summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h191
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c97
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c159
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c80
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c113
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c581
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c1791
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c335
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c80
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c331
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c242
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c191
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c870
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c264
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c200
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c107
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c1269
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h104
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c429
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c658
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c177
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c157
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c315
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c314
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c150
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c504
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c924
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h135
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c968
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c622
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c371
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c266
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c433
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c310
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h170
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c174
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c438
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c107
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c482
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h91
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm124
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c1125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c974
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c130
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c203
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c466
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm153
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c405
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c129
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c96
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c259
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.c61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v7_0.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v12_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c116
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c228
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c116
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c119
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c607
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c114
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c1093
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.c355
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_userqueue.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c259
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v12_0.c374
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c100
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c113
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nvd.h208
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.c102
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c136
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v14_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c362
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c620
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c583
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c560
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c648
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c545
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c176
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_enums.h246
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sid.h1924
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c158
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c123
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc24.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_ras_if.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.c325
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.h18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_14.c160
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_14.h51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c315
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c314
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c303
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c318
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c1266
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c757
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c786
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c1004
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c772
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c756
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c1624
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c108
286 files changed, 29482 insertions, 15946 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 0051fb1b437f..1acfed2f92ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -2,10 +2,13 @@
config DRM_AMDGPU
tristate "AMD GPU"
- depends on DRM && PCI && MMU
+ depends on DRM && PCI
depends on !UML
select FW_LOADER
+ select DRM_CLIENT
+ select DRM_CLIENT_SELECTION
select DRM_DISPLAY_DP_HELPER
+ select DRM_DISPLAY_DSC_HELPER
select DRM_DISPLAY_HDMI_HELPER
select DRM_DISPLAY_HDCP_HELPER
select DRM_DISPLAY_HELPER
@@ -23,6 +26,7 @@ config DRM_AMDGPU
select DRM_BUDDY
select DRM_SUBALLOC_HELPER
select DRM_EXEC
+ select DRM_PANEL_BACKLIGHT_QUIRKS
# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
# ACPI_VIDEO's dependencies must also be selected.
select INPUT if ACPI
@@ -64,7 +68,6 @@ config DRM_AMDGPU_CIK
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
- depends on MMU
select HMM_MIRROR
select MMU_NOTIFIER
help
@@ -73,7 +76,7 @@ config DRM_AMDGPU_USERPTR
config DRM_AMD_ISP
bool "Enable AMD Image Signal Processor IP support"
- depends on DRM_AMDGPU
+ depends on DRM_AMDGPU && ACPI
select MFD_CORE
select PM_GENERIC_DOMAINS if PM
help
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index c7b18c52825d..87080c06e5fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -1,5 +1,5 @@
#
-# Copyright 2017 Advanced Micro Devices, Inc.
+# Copyright 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
@@ -65,7 +65,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
- amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o
+ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
+ amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
@@ -105,7 +106,7 @@ amdgpu-y += \
# add UMC block
amdgpu-y += \
- umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o
+ umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o umc_v8_14.o
# add IH block
amdgpu-y += \
@@ -173,7 +174,10 @@ amdgpu-y += \
amdgpu-y += \
amdgpu_mes.o \
mes_v11_0.o \
- mes_v12_0.o
+ mes_v12_0.o \
+
+# add GFX userqueue support
+amdgpu-y += mes_userqueue.o
# add UVD block
amdgpu-y += \
@@ -200,6 +204,7 @@ amdgpu-y += \
vcn_v4_0_3.o \
vcn_v4_0_5.o \
vcn_v5_0_0.o \
+ vcn_v5_0_1.o \
amdgpu_jpeg.o \
jpeg_v1_0.o \
jpeg_v2_0.o \
@@ -208,7 +213,8 @@ amdgpu-y += \
jpeg_v4_0.o \
jpeg_v4_0_3.o \
jpeg_v4_0_5.o \
- jpeg_v5_0_0.o
+ jpeg_v5_0_0.o \
+ jpeg_v5_0_1.o
# add VPE block
amdgpu-y += \
@@ -250,6 +256,8 @@ amdgpu-y += \
# add amdkfd interfaces
amdgpu-y += amdgpu_amdkfd.o
+# add gfx usermode queue
+amdgpu-y += amdgpu_userq.o
ifneq ($(CONFIG_HSA_AMD),)
AMDKFD_PATH := ../amdkfd
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index b0f95a7649bf..e13fbd974141 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -85,16 +85,9 @@ static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_SDMA))
continue;
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
-
- if (r) {
- dev_err(adev->dev,
- "suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
-
- adev->ip_blocks[i].status.hw = false;
}
return 0;
@@ -246,7 +239,7 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
dev_err(adev->dev, "Failed to get BIF handle\n");
return -EINVAL;
}
- r = cmn_block->version->funcs->resume(adev);
+ r = amdgpu_ip_block_resume(cmn_block);
if (r)
return r;
@@ -282,15 +275,10 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
adev->ip_blocks[i].version->type ==
AMD_IP_BLOCK_TYPE_SDMA))
continue;
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- dev_err(adev->dev,
- "resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- }
- adev->ip_blocks[i].status.hw = true;
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
}
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -304,7 +292,7 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->funcs->late_init) {
r = adev->ip_blocks[i].version->funcs->late_init(
- (void *)adev);
+ &adev->ip_blocks[i]);
if (r) {
dev_err(adev->dev,
"late_init of IP block <%s> failed %d after reset\n",
@@ -342,8 +330,12 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
}
list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_RESET_RECOVERY);
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
+ /*TBD: Ideally should clear only GFX, SDMA blocks*/
+ amdgpu_ras_clear_err_state(tmp_adev);
r = aldebaran_mode2_restore_ip(tmp_adev);
if (r)
goto end;
@@ -387,6 +379,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
tmp_adev);
if (!r) {
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_DEFAULT);
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
r = amdgpu_ib_ring_tests(tmp_adev);
@@ -417,6 +411,7 @@ static struct amdgpu_reset_handler aldebaran_mode2_handler = {
static struct amdgpu_reset_handler
*aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = {
&aldebaran_mode2_handler,
+ &xgmi_reset_on_init_handler,
};
int aldebaran_reset_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9b1e0ede05a4..a5ccd0ada16a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -109,16 +109,19 @@
#include "amdgpu_mca.h"
#include "amdgpu_aca.h"
#include "amdgpu_ras.h"
+#include "amdgpu_cper.h"
#include "amdgpu_xcp.h"
#include "amdgpu_seq64.h"
#include "amdgpu_reg_state.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_eviction_fence.h"
#if defined(CONFIG_DRM_AMD_ISP)
#include "amdgpu_isp.h"
#endif
#define MAX_GPU_INSTANCE 64
-#define GFX_SLICE_PERIOD msecs_to_jiffies(250)
+#define GFX_SLICE_PERIOD_MS 250
struct amdgpu_gpu_instance {
struct amdgpu_device *adev;
@@ -131,10 +134,6 @@ struct amdgpu_mgpu_info {
uint32_t num_gpu;
uint32_t num_dgpu;
uint32_t num_apu;
-
- /* delayed reset_func for XGMI configuration if necessary */
- struct delayed_work delayed_reset_work;
- bool pending_reset;
};
enum amdgpu_ss {
@@ -231,7 +230,7 @@ extern int amdgpu_force_asic_type;
extern int amdgpu_smartshift_bias;
extern int amdgpu_use_xgmi_p2p;
extern int amdgpu_mtype_local;
-extern bool enforce_isolation;
+extern int amdgpu_enforce_isolation;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
extern bool debug_evictions;
@@ -269,8 +268,10 @@ extern int amdgpu_umsch_mm_fwlog;
extern int amdgpu_user_partt_mode;
extern int amdgpu_agp;
+extern int amdgpu_rebar;
extern int amdgpu_wbrf;
+extern int amdgpu_user_queue;
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
@@ -303,6 +304,12 @@ extern int amdgpu_wbrf;
#define AMDGPU_RESET_VCE (1 << 13)
#define AMDGPU_RESET_VCE1 (1 << 14)
+/* reset mask */
+#define AMDGPU_RESET_TYPE_FULL (1 << 0) /* full adapter reset, mode1/mode2/BACO/etc. */
+#define AMDGPU_RESET_TYPE_SOFT_RESET (1 << 1) /* IP level soft reset */
+#define AMDGPU_RESET_TYPE_PER_QUEUE (1 << 2) /* per queue */
+#define AMDGPU_RESET_TYPE_PER_PIPE (1 << 3) /* per pipe */
+
/* max cursor sizes (in pixels) */
#define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128
@@ -350,7 +357,6 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};
-#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000
@@ -365,8 +371,11 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
u64 *flags);
int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
-bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block);
+
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);
#define AMDGPU_MAX_IP_NUM 16
@@ -389,6 +398,7 @@ struct amdgpu_ip_block_version {
struct amdgpu_ip_block {
struct amdgpu_ip_block_status status;
const struct amdgpu_ip_block_version *version;
+ struct amdgpu_device *adev;
};
int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
@@ -409,6 +419,7 @@ bool amdgpu_get_bios(struct amdgpu_device *adev);
bool amdgpu_read_bios(struct amdgpu_device *adev);
bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
u8 *bios, u32 length_bytes);
+void amdgpu_bios_release(struct amdgpu_device *adev);
/*
* Clocks
*/
@@ -481,7 +492,6 @@ struct amdgpu_flip_work {
bool async;
};
-
/*
* file private structure
*/
@@ -494,6 +504,11 @@ struct amdgpu_fpriv {
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
+ struct amdgpu_userq_mgr userq_mgr;
+
+ /* Eviction fence infra */
+ struct amdgpu_eviction_fence_mgr evf_mgr;
+
/** GPU partition selection */
uint32_t xcp_id;
};
@@ -505,12 +520,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
*/
#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */
+/**
+ * amdgpu_wb - This struct is used for small GPU memory allocation.
+ *
+ * This struct is used to allocate a small amount of GPU memory that can be
+ * used to shadow certain states into the memory. This is especially useful for
+ * providing easy CPU access to some states without requiring register access
+ * (e.g., if some block is power gated, reading register may be problematic).
+ *
+ * Note: the term writeback was initially used because many of the amdgpu
+ * components had some level of writeback memory, and this struct initially
+ * described those components.
+ */
struct amdgpu_wb {
+
+ /**
+ * @wb_obj:
+ *
+ * Buffer Object used for the writeback memory.
+ */
struct amdgpu_bo *wb_obj;
+
+ /**
+ * @wb:
+ *
+ * Pointer to the first writeback slot. In terms of CPU address
+ * this value can be accessed directly by using the offset as an index.
+ * For the GPU address, it is necessary to use gpu_addr and the offset.
+ */
volatile uint32_t *wb;
+
+ /**
+ * @gpu_addr:
+ *
+ * Writeback base address in the GPU.
+ */
uint64_t gpu_addr;
- u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */
+
+ /**
+ * @num_wb:
+ *
+ * Number of writeback slots reserved for amdgpu.
+ */
+ u32 num_wb;
+
+ /**
+ * @used:
+ *
+ * Track the writeback slot already used.
+ */
unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
+
+ /**
+ * @lock:
+ *
+ * Protects read and write of the used field array.
+ */
spinlock_t lock;
};
@@ -544,6 +609,7 @@ struct amdgpu_allowed_register_entry {
* are reset depends on the ASIC. Notably doesn't reset IPs
* shared with the CPU on APUs or the memory controllers (so
* VRAM is not lost). Not available on all ASICs.
+ * @AMD_RESET_LINK: Triggers SW-UP link reset on other GPUs
* @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card
* but without powering off the PCI bus. Suitable only for
* discrete GPUs.
@@ -561,8 +627,10 @@ enum amd_reset_method {
AMD_RESET_METHOD_MODE0,
AMD_RESET_METHOD_MODE1,
AMD_RESET_METHOD_MODE2,
+ AMD_RESET_METHOD_LINK,
AMD_RESET_METHOD_BACO,
AMD_RESET_METHOD_PCI,
+ AMD_RESET_METHOD_ON_INIT,
};
struct amdgpu_video_codec_info {
@@ -813,6 +881,11 @@ struct amdgpu_mqd_prop {
uint32_t hqd_queue_priority;
bool allow_tunneling;
bool hqd_active;
+ uint64_t shadow_addr;
+ uint64_t gds_bkup_addr;
+ uint64_t csa_addr;
+ uint64_t fence_address;
+ bool tmz_queue;
};
struct amdgpu_mqd {
@@ -821,11 +894,44 @@ struct amdgpu_mqd {
struct amdgpu_mqd_prop *p);
};
+struct amdgpu_pcie_reset_ctx {
+ bool in_link_reset;
+ bool occurs_dpc;
+ bool audio_suspended;
+};
+
+/*
+ * Custom Init levels could be defined for different situations where a full
+ * initialization of all hardware blocks are not expected. Sample cases are
+ * custom init sequences after resume after S0i3/S3, reset on initialization,
+ * partial reset of blocks etc. Presently, this defines only two levels. Levels
+ * are described in corresponding struct definitions - amdgpu_init_default,
+ * amdgpu_init_minimal_xgmi.
+ */
+enum amdgpu_init_lvl_id {
+ AMDGPU_INIT_LEVEL_DEFAULT,
+ AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+ AMDGPU_INIT_LEVEL_RESET_RECOVERY,
+};
+
+struct amdgpu_init_level {
+ enum amdgpu_init_lvl_id level;
+ uint32_t hwini_ip_block_mask;
+};
+
#define AMDGPU_RESET_MAGIC_NUM 64
#define AMDGPU_MAX_DF_PERFMONS 4
struct amdgpu_reset_domain;
struct amdgpu_fru_info;
+enum amdgpu_enforce_isolation_mode {
+ AMDGPU_ENFORCE_ISOLATION_DISABLE = 0,
+ AMDGPU_ENFORCE_ISOLATION_ENABLE = 1,
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY = 2,
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER = 3,
+};
+
+
/*
* Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
*/
@@ -854,6 +960,7 @@ struct amdgpu_device {
bool need_swiotlb;
bool accel_working;
struct notifier_block acpi_nb;
+ struct notifier_block pm_nb;
struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS];
struct debugfs_blob_wrapper debugfs_vbios_blob;
struct debugfs_blob_wrapper debugfs_discovery_blob;
@@ -1053,6 +1160,13 @@ struct amdgpu_device {
bool enable_uni_mes;
struct amdgpu_mes mes;
struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
+ const struct amdgpu_userq_funcs *userq_funcs[AMDGPU_HW_IP_NUM];
+
+ /* xarray used to retrieve the user queue fence driver reference
+ * in the EOP interrupt handler to signal the particular user
+ * queue fence.
+ */
+ struct xarray userq_xa;
/* df */
struct amdgpu_df df;
@@ -1063,6 +1177,9 @@ struct amdgpu_device {
/* ACA */
struct amdgpu_aca aca;
+ /* CPER */
+ struct amdgpu_cper cper;
+
struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM];
uint32_t harvest_ip_mask;
int num_ip_blocks;
@@ -1092,8 +1209,7 @@ struct amdgpu_device {
bool in_s3;
bool in_s4;
bool in_s0ix;
- /* indicate amdgpu suspension status */
- bool suspend_complete;
+ suspend_state_t last_suspend_state;
enum pp_mp1_state mp1_state;
struct amdgpu_doorbell_index doorbell_index;
@@ -1124,11 +1240,14 @@ struct amdgpu_device {
struct ratelimit_state throttling_logging_rs;
uint32_t ras_hw_enabled;
uint32_t ras_enabled;
+ bool ras_default_ecc_enabled;
bool no_hw_access;
struct pci_saved_state *pci_state;
pci_channel_state_t pci_channel_state;
+ struct amdgpu_pcie_reset_ctx pcie_reset_ctx;
+
/* Track auto wait count on s_barrier settings */
bool barrier_has_auto_waitcnt;
@@ -1150,7 +1269,6 @@ struct amdgpu_device {
struct work_struct reset_work;
- bool job_hang;
bool dc_enabled;
/* Mask of active clusters */
uint32_t aid_mask;
@@ -1162,10 +1280,29 @@ struct amdgpu_device {
bool debug_use_vram_fw_buf;
bool debug_enable_ras_aca;
bool debug_exp_resets;
+ bool debug_disable_gpu_ring_reset;
+ bool debug_vm_userptr;
- bool enforce_isolation[MAX_XCP];
- /* Added this mutex for cleaner shader isolation between GFX and compute processes */
+ /* Protection for the following isolation structure */
struct mutex enforce_isolation_mutex;
+ enum amdgpu_enforce_isolation_mode enforce_isolation[MAX_XCP];
+ struct amdgpu_isolation {
+ void *owner;
+ struct dma_fence *spearhead;
+ struct amdgpu_sync active;
+ struct amdgpu_sync prev;
+ } isolation[MAX_XCP];
+
+ struct amdgpu_init_level *init_lvl;
+
+ /* This flag is used to determine how VRAM allocations are handled for APUs
+ * in KFD: VRAM or GTT.
+ */
+ bool apu_prefer_gtt;
+
+ struct list_head userq_mgr_list;
+ struct mutex userq_mutex;
+ bool userq_halt_for_enforce_isolation;
};
static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
@@ -1261,6 +1398,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_reset_context *reset_context);
+int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context);
+
int emu_soc_asic_init(struct amdgpu_device *adev);
/*
@@ -1418,6 +1557,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 array_size);
int amdgpu_device_mode1_reset(struct amdgpu_device *adev);
+int amdgpu_device_link_reset(struct amdgpu_device *adev);
bool amdgpu_device_supports_atpx(struct drm_device *dev);
bool amdgpu_device_supports_px(struct drm_device *dev);
bool amdgpu_device_supports_boco(struct drm_device *dev);
@@ -1442,7 +1582,12 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
struct dma_fence *gang);
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job);
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
+ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
+ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);
/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
@@ -1450,23 +1595,15 @@ void amdgpu_register_atpx_handler(void);
void amdgpu_unregister_atpx_handler(void);
bool amdgpu_has_atpx_dgpu_power_cntl(void);
bool amdgpu_is_atpx_hybrid(void);
-bool amdgpu_atpx_dgpu_req_power_for_displays(void);
bool amdgpu_has_atpx(void);
#else
static inline void amdgpu_register_atpx_handler(void) {}
static inline void amdgpu_unregister_atpx_handler(void) {}
static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
-static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; }
static inline bool amdgpu_has_atpx(void) { return false; }
#endif
-#if defined(CONFIG_VGA_SWITCHEROO) && defined(CONFIG_ACPI)
-void *amdgpu_atpx_get_dhandle(void);
-#else
-static inline void *amdgpu_atpx_get_dhandle(void) { return NULL; }
-#endif
-
/*
* KMS
*/
@@ -1571,11 +1708,13 @@ static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_cap
#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND)
bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev);
bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev);
-void amdgpu_choose_low_power_state(struct amdgpu_device *adev);
#else
static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; }
static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; }
-static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { }
+#endif
+
+#if defined(CONFIG_DRM_AMD_ISP)
+int amdgpu_acpi_get_isp4_dev_hid(u8 (*hid)[ACPI_ID_LEN]);
#endif
void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
@@ -1619,4 +1758,6 @@ extern const struct attribute_group amdgpu_vram_mgr_attr_group;
extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
extern const struct attribute_group amdgpu_flash_attr_group;
+void amdgpu_set_init_level(struct amdgpu_device *adev,
+ enum amdgpu_init_lvl_id lvl);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index 2ca127173135..3835f2592914 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -30,16 +30,6 @@
typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
-struct aca_banks {
- int nr_banks;
- struct list_head list;
-};
-
-struct aca_hwip {
- int hwid;
- int mcatype;
-};
-
static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = {
ACA_BANK_HWID(SMU, 0x01, 0x01),
ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00),
@@ -111,7 +101,7 @@ static struct aca_regs_dump {
{"STATUS", ACA_REG_IDX_STATUS},
{"ADDR", ACA_REG_IDX_ADDR},
{"MISC", ACA_REG_IDX_MISC0},
- {"CONFIG", ACA_REG_IDX_CONFG},
+ {"CONFIG", ACA_REG_IDX_CONFIG},
{"IPID", ACA_REG_IDX_IPID},
{"SYND", ACA_REG_IDX_SYND},
{"DESTAT", ACA_REG_IDX_DESTAT},
@@ -130,6 +120,9 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st
for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
+
+ if (ACA_REG__STATUS__SCRUB(bank->regs[ACA_REG_IDX_STATUS]))
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "hardware error logged by the scrubber\n");
}
static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
@@ -158,7 +151,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
return -EINVAL;
}
- if (start + count >= max_count)
+ if (start + count > max_count)
return -EINVAL;
count = min_t(int, count, max_count);
@@ -168,7 +161,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_
if (ret)
return ret;
- bank.type = type;
+ bank.smu_err_type = type;
aca_smu_bank_dump(adev, i, count, &bank, qctx);
@@ -205,6 +198,10 @@ static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
{
const struct aca_bank_ops *bank_ops = handle->bank_ops;
+ /* Parse all deferred errors with UMC aca handle */
+ if (ACA_BANK_ERR_IS_DEFFERED(bank))
+ return handle->hwip == ACA_HWIP_TYPE_UMC;
+
if (!aca_bank_hwip_is_matched(bank, handle->hwip))
return false;
@@ -394,6 +391,56 @@ static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type
return ret;
}
+static void aca_banks_generate_cper(struct amdgpu_device *adev,
+ enum aca_smu_type type,
+ struct aca_banks *banks,
+ int count)
+{
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ int r;
+
+ if (!adev->cper.enabled)
+ return;
+
+ if (!banks || !count) {
+ dev_warn(adev->dev, "fail to generate cper records\n");
+ return;
+ }
+
+ /* UEs must be encoded into separate CPER entries */
+ if (type == ACA_SMU_TYPE_UE) {
+ struct aca_banks de_banks;
+
+ aca_banks_init(&de_banks);
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+ r = aca_banks_add_bank(&de_banks, bank);
+ if (r)
+ dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r);
+ } else {
+ if (amdgpu_cper_generate_ue_record(adev, bank))
+ dev_warn(adev->dev, "fail to generate ue cper records\n");
+ }
+ }
+
+ if (!list_empty(&de_banks.list)) {
+ if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks))
+ dev_warn(adev->dev, "fail to generate de cper records\n");
+ }
+
+ aca_banks_release(&de_banks);
+ } else {
+ /*
+ * SMU_TYPE_CE banks are combined into 1 CPER entries,
+ * they could be CEs or DEs or both
+ */
+ if (amdgpu_cper_generate_ce_records(adev, banks, count))
+ dev_warn(adev->dev, "fail to generate ce cper records\n");
+ }
+}
+
static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
bank_handler_t handler, struct ras_query_context *qctx, void *data)
{
@@ -431,6 +478,8 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
if (ret)
goto err_release_banks;
+ aca_banks_generate_cper(adev, type, &banks, count);
+
err_release_banks:
aca_banks_release(&banks);
@@ -516,6 +565,10 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
if (ret)
return ret;
+ /* DEs may contain in CEs or UEs */
+ if (type != ACA_ERROR_TYPE_DEFERRED)
+ aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data);
+
return aca_log_aca_error(handle, type, err_data);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
index 5ef6b745f222..38c88897e1ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -71,12 +71,21 @@ struct ras_query_context;
#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE)
#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED)
+#define mmSMNAID_AID0_MCA_SMU 0x03b30400 /* SMN AID AID0 */
+#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */
+#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
+#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
+
+#define ACA_BANK_ERR_IS_DEFFERED(bank) \
+ (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
+ ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS]))
+
enum aca_reg_idx {
ACA_REG_IDX_CTL = 0,
ACA_REG_IDX_STATUS = 1,
ACA_REG_IDX_ADDR = 2,
ACA_REG_IDX_MISC0 = 3,
- ACA_REG_IDX_CONFG = 4,
+ ACA_REG_IDX_CONFIG = 4,
ACA_REG_IDX_IPID = 5,
ACA_REG_IDX_SYND = 6,
ACA_REG_IDX_DESTAT = 8,
@@ -103,13 +112,20 @@ enum aca_error_type {
};
enum aca_smu_type {
+ ACA_SMU_TYPE_INVALID = -1,
ACA_SMU_TYPE_UE = 0,
ACA_SMU_TYPE_CE,
ACA_SMU_TYPE_COUNT,
};
+struct aca_hwip {
+ int hwid;
+ int mcatype;
+};
+
struct aca_bank {
- enum aca_smu_type type;
+ enum aca_error_type aca_err_type;
+ enum aca_smu_type smu_err_type;
u64 regs[ACA_MAX_REGS_COUNT];
};
@@ -118,6 +134,11 @@ struct aca_bank_node {
struct list_head node;
};
+struct aca_banks {
+ int nr_banks;
+ struct list_head list;
+};
+
struct aca_bank_info {
int die_id;
int socket_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index bf6c4a0d0525..4926996f94da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -98,9 +98,9 @@ enum {
ACP_TILE_DSP2,
};
-static int acp_sw_init(void *handle)
+static int acp_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->acp.parent = adev->dev;
@@ -112,9 +112,9 @@ static int acp_sw_init(void *handle)
return 0;
}
-static int acp_sw_fini(void *handle)
+static int acp_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->acp.cgs_device)
amdgpu_cgs_destroy_device(adev->acp.cgs_device);
@@ -140,7 +140,7 @@ static int acp_poweroff(struct generic_pm_domain *genpd)
* 2. power off the acp tiles
* 3. check and enter ulv state
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
}
@@ -157,7 +157,7 @@ static int acp_poweron(struct generic_pm_domain *genpd)
* 2. turn on acp clock
* 3. power on acp tiles
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
@@ -219,10 +219,10 @@ static const struct dmi_system_id acp_quirk_table[] = {
/**
* acp_hw_init - start and test ACP block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int acp_hw_init(void *handle)
+static int acp_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
u64 acp_base;
@@ -230,19 +230,13 @@ static int acp_hw_init(void *handle)
u32 count = 0;
struct i2s_platform_data *i2s_pdata = NULL;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- const struct amdgpu_ip_block *ip_block =
- amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);
-
- if (!ip_block)
- return -EINVAL;
+ struct amdgpu_device *adev = ip_block->adev;
r = amd_acp_hw_init(adev->acp.cgs_device,
ip_block->version->major, ip_block->version->minor);
/* -ENODEV means board uses AZ rather than ACP */
if (r == -ENODEV) {
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
} else if (r) {
return r;
@@ -503,18 +497,18 @@ failure:
/**
* acp_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int acp_hw_fini(void *handle)
+static int acp_hw_fini(struct amdgpu_ip_block *ip_block)
{
u32 val = 0;
u32 count = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* return early if no ACP */
if (!adev->acp.acp_genpd) {
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
@@ -565,67 +559,50 @@ static int acp_hw_fini(void *handle)
return 0;
}
-static int acp_suspend(void *handle)
+static int acp_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* power up on suspend */
if (!adev->acp.acp_cell)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0);
return 0;
}
-static int acp_resume(void *handle)
+static int acp_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* power down again on resume */
if (!adev->acp.acp_cell)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
- return 0;
-}
-
-static int acp_early_init(void *handle)
-{
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0);
return 0;
}
-static bool acp_is_idle(void *handle)
+static bool acp_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int acp_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int acp_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int acp_set_clockgating_state(void *handle,
+static int acp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int acp_set_powergating_state(void *handle,
+static int acp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable, 0);
return 0;
}
static const struct amd_ip_funcs acp_ip_funcs = {
.name = "acp_ip",
- .early_init = acp_early_init,
- .late_init = NULL,
.sw_init = acp_sw_init,
.sw_fini = acp_sw_fini,
.hw_init = acp_hw_init,
@@ -633,12 +610,8 @@ static const struct amd_ip_funcs acp_ip_funcs = {
.suspend = acp_suspend,
.resume = acp_resume,
.is_idle = acp_is_idle,
- .wait_for_idle = acp_wait_for_idle,
- .soft_reset = acp_soft_reset,
.set_clockgating_state = acp_set_clockgating_state,
.set_powergating_state = acp_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version acp_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 1f5a296f5ed2..f5466c592d94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -172,8 +172,8 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
&buffer);
obj = (union acpi_object *)buffer.pointer;
- /* Fail if calling the method fails and ATIF is supported */
- if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+ /* Fail if calling the method fails */
+ if (ACPI_FAILURE(status)) {
DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n",
acpi_format_exception(status));
kfree(obj);
@@ -394,6 +394,10 @@ static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
characteristics.max_input_signal;
atif->backlight_caps.ac_level = characteristics.ac_level;
atif->backlight_caps.dc_level = characteristics.dc_level;
+ atif->backlight_caps.data_points = characteristics.number_of_points;
+ memcpy(atif->backlight_caps.luminance_data,
+ characteristics.data_points,
+ sizeof(atif->backlight_caps.luminance_data));
out:
kfree(info);
return err;
@@ -800,6 +804,7 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev,
return -EIO;
}
+ kfree(info);
return 0;
}
@@ -1276,11 +1281,7 @@ void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps)
{
struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
- caps->caps_valid = atif->backlight_caps.caps_valid;
- caps->min_input_signal = atif->backlight_caps.min_input_signal;
- caps->max_input_signal = atif->backlight_caps.max_input_signal;
- caps->ac_level = atif->backlight_caps.ac_level;
- caps->dc_level = atif->backlight_caps.dc_level;
+ memcpy(caps, &atif->backlight_caps, sizeof(*caps));
}
/**
@@ -1531,23 +1532,35 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev)
return true;
#endif /* CONFIG_AMD_PMC */
}
+#endif /* CONFIG_SUSPEND */
-/**
- * amdgpu_choose_low_power_state
- *
- * @adev: amdgpu_device_pointer
- *
- * Choose the target low power state for the GPU
- */
-void amdgpu_choose_low_power_state(struct amdgpu_device *adev)
-{
- if (adev->in_runpm)
- return;
+#if IS_ENABLED(CONFIG_DRM_AMD_ISP)
+static const struct acpi_device_id isp_sensor_ids[] = {
+ { "OMNI5C10" },
+ { }
+};
- if (amdgpu_acpi_is_s0ix_active(adev))
- adev->in_s0ix = true;
- else if (amdgpu_acpi_is_s3_active(adev))
- adev->in_s3 = true;
+static int isp_match_acpi_device_ids(struct device *dev, const void *data)
+{
+ return acpi_match_device(data, dev) ? 1 : 0;
}
-#endif /* CONFIG_SUSPEND */
+int amdgpu_acpi_get_isp4_dev_hid(u8 (*hid)[ACPI_ID_LEN])
+{
+ struct device *pdev __free(put_device) = NULL;
+ struct acpi_device *acpi_pdev;
+
+ pdev = bus_find_device(&platform_bus_type, NULL, isp_sensor_ids,
+ isp_match_acpi_device_ids);
+ if (!pdev)
+ return -EINVAL;
+
+ acpi_pdev = ACPI_COMPANION(pdev);
+ if (!acpi_pdev)
+ return -ENODEV;
+
+ strscpy(*hid, acpi_device_hid(acpi_pdev));
+
+ return 0;
+}
+#endif /* CONFIG_DRM_AMD_ISP */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 4f08b153cb66..d8ac4b1051a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -368,7 +368,10 @@ void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj)
{
struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj;
- amdgpu_bo_reserve(*bo, true);
+ if (!bo || !*bo)
+ return;
+
+ (void)amdgpu_bo_reserve(*bo, true);
amdgpu_bo_kunmap(*bo);
amdgpu_bo_unpin(*bo);
amdgpu_bo_unreserve(*bo);
@@ -459,7 +462,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
else
mem_info->local_mem_size_private =
KFD_XCP_MEMORY_SIZE(adev, xcp->id);
- } else if (adev->flags & AMD_IS_APU) {
+ } else if (adev->apu_prefer_gtt) {
mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT);
mem_info->local_mem_size_private = 0;
} else {
@@ -555,48 +558,6 @@ out_put:
return r;
}
-uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
- struct amdgpu_device *src)
-{
- struct amdgpu_device *peer_adev = src;
- struct amdgpu_device *adev = dst;
- int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
-
- if (ret < 0) {
- DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
- adev->gmc.xgmi.physical_node_id,
- peer_adev->gmc.xgmi.physical_node_id, ret);
- ret = 0;
- }
- return (uint8_t)ret;
-}
-
-int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
- struct amdgpu_device *src,
- bool is_min)
-{
- struct amdgpu_device *adev = dst, *peer_adev;
- int num_links;
-
- if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))
- return 0;
-
- if (src)
- peer_adev = src;
-
- /* num links returns 0 for indirect peers since indirect route is unknown. */
- num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev);
- if (num_links < 0) {
- DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
- adev->gmc.xgmi.physical_node_id,
- peer_adev->gmc.xgmi.physical_node_id, num_links);
- num_links = 0;
- }
-
- /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */
- return (num_links * 16 * 25000)/BITS_PER_BYTE;
-}
-
int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min)
{
int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) :
@@ -715,8 +676,9 @@ err:
void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
{
enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE;
- if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
- ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) {
+ if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 &&
+ ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) ||
+ (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 12)) {
pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
amdgpu_gfx_off_ctrl(adev, idle);
} else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) &&
@@ -724,7 +686,9 @@ void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
/* Disable GFXOFF and PG. Temporary workaround
* to fix some compute applications issue on GFX9.
*/
- adev->ip_blocks[AMD_IP_BLOCK_TYPE_GFX].version->funcs->set_powergating_state((void *)adev, state);
+ struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+ if (gfx_block != NULL)
+ gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state);
}
amdgpu_dpm_switch_power_profile(adev,
PP_SMC_POWER_PROFILE_COMPUTE,
@@ -815,7 +779,7 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
}
do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
return ALIGN_DOWN(tmp, PAGE_SIZE);
- } else if (adev->flags & AMD_IS_APU) {
+ } else if (adev->apu_prefer_gtt) {
return (ttm_tt_pages_limit() << PAGE_SHIFT);
} else {
return adev->gmc.real_vram_size;
@@ -834,6 +798,9 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
+ if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL);
if (!ring_funcs)
return -ENOMEM;
@@ -858,8 +825,14 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0);
- if (kiq_ring->sched.ready && !adev->job_hang)
- r = amdgpu_ring_test_helper(kiq_ring);
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
+ */
+ r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
@@ -889,3 +862,27 @@ int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id)
return kgd2kfd_start_sched(adev->kfd.dev, node_id);
}
+
+/* check if there are KFD queues active */
+bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id)
+{
+ if (!adev->kfd.init_complete)
+ return false;
+
+ return kgd2kfd_compute_active(adev->kfd.dev, node_id);
+}
+
+/* Config CGTT_SQ_CLK_CTRL */
+int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable)
+{
+ int r;
+
+ if (!adev->kfd.init_complete)
+ return 0;
+
+ r = psp_config_sq_perfmon(&adev->psp, xcp_id, core_override_enable,
+ reg_override_enable, perfmon_override_enable);
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index f9d119448442..b6ca41859b53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -47,6 +47,7 @@ enum TLB_FLUSH_TYPE {
};
struct amdgpu_device;
+struct kfd_process_device;
struct amdgpu_reset_context;
enum kfd_mem_attachment_type {
@@ -192,7 +193,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
#if IS_ENABLED(CONFIG_HSA_AMD)
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo);
int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
unsigned long cur_seq, struct kgd_mem *mem);
int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
@@ -212,9 +213,8 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
}
static inline
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{
- return 0;
}
static inline
@@ -254,11 +254,6 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
uint64_t *bo_size, void *metadata_buffer,
size_t buffer_size, uint32_t *metadata_size,
uint32_t *flags, int8_t *xcp_id);
-uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst,
- struct amdgpu_device *src);
-int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst,
- struct amdgpu_device *src,
- bool is_min);
int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min);
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
uint32_t *payload);
@@ -266,6 +261,10 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
u32 inst);
int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id);
int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id);
+int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
+bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id);
+
/* Read user wptr from a specified user address space with page fault
* disabled. The memory must be pinned and mapped to the hardware when
@@ -295,14 +294,10 @@ int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id);
(&((struct amdgpu_fpriv *) \
((struct drm_file *)(drm_priv))->driver_priv)->vm)
-int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
- struct amdgpu_vm *avm, u32 pasid);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
struct amdgpu_vm *avm,
void **process_info,
struct dma_fence **ef);
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
- void *drm_priv);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
uint8_t xcp_id);
@@ -428,6 +423,10 @@ int kgd2kfd_check_and_lock_kfd(void);
void kgd2kfd_unlock_kfd(void);
int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id);
int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id);
+bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
+bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault);
+
#else
static inline int kgd2kfd_init(void)
{
@@ -508,5 +507,17 @@ static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
{
return 0;
}
+
+static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
+{
+ return false;
+}
+
+static inline bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
+ bool retry_fault)
+{
+ return false;
+}
+
#endif
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 8dfdb18197c4..7e9f7a280c1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -189,8 +189,9 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 9435af2e6bdc..ffbaa8bc5eea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -299,7 +299,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus
if (r)
goto out;
} else {
- drm_sched_start(&ring->sched);
+ drm_sched_start(&ring->sched, 0);
}
}
@@ -415,9 +415,10 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
- .hqd_reset = kgd_gfx_v9_hqd_reset
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index e2ae714a700f..89a45a9218f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -509,6 +509,17 @@ static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev,
return 0;
}
+static uint32_t kgd_gfx_v9_4_3_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ uint32_t reg_offset = get_sdma_rlc_reg_offset(adev, engine, queue);
+ uint32_t status = RREG32(regSDMA_RLC0_CONTEXT_STATUS + reg_offset);
+ uint32_t doorbell_off = RREG32(regSDMA_RLC0_DOORBELL_OFFSET + reg_offset);
+ bool is_active = !!REG_GET_FIELD(status, SDMA_RLC0_CONTEXT_STATUS, SELECTED);
+
+ return is_active ? doorbell_off >> 2 : 0;
+}
+
const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping,
@@ -530,8 +541,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings =
kgd_gfx_v9_program_trap_handler_settings,
- .build_grace_period_packet_info =
- kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info =
+ kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
@@ -543,5 +554,6 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
.set_address_watch = kgd_gfx_v9_4_3_set_address_watch,
.clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
- .hqd_reset = kgd_gfx_v9_hqd_reset
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_4_3_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 62176d607bef..04ef0ca10541 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
}
-void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;
- /*
- * The CP cannont handle a 0 grace period input and will result in
- * an infinite grace period being set so set to 1 to prevent this.
- */
- if (grace_period == 0)
- grace_period = 1;
-
- *reg_data = REG_SET_FIELD(*reg_data,
- CP_IQ_WAIT_TIME2,
- SCH_WAVE,
- grace_period);
+ if (sch_wave)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ sch_wave);
+ if (que_sleep)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
@@ -1084,6 +1084,12 @@ uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
return 0;
}
+uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
@@ -1109,8 +1115,9 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.set_address_watch = kgd_gfx_v10_set_address_watch,
.clear_address_watch = kgd_gfx_v10_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
- .hqd_reset = kgd_gfx_v10_hqd_reset
+ .hqd_reset = kgd_gfx_v10_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
index 9efd2dd4fdd7..a4c607c88178 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
-void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
@@ -65,3 +66,5 @@ uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
uint32_t queue_id,
uint32_t inst,
unsigned int utimeout);
+uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index c718bedda0ca..6d08bc2781a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
@@ -682,5 +682,6 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.set_address_watch = kgd_gfx_v10_set_address_watch,
.clear_address_watch = kgd_gfx_v10_clear_address_watch,
.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
- .hqd_reset = kgd_gfx_v10_hqd_reset
+ .hqd_reset = kgd_gfx_v10_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
index a4ba49cb22db..e0e6a6a49d90 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -800,6 +800,12 @@ static uint64_t kgd_gfx_v11_hqd_reset(struct amdgpu_device *adev,
return 0;
}
+static uint32_t kgd_gfx_v11_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
.program_sh_mem_settings = program_sh_mem_settings_v11,
.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
@@ -824,5 +830,6 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
.set_address_watch = kgd_gfx_v11_set_address_watch,
.clear_address_watch = kgd_gfx_v11_clear_address_watch,
.hqd_get_pq_addr = kgd_gfx_v11_hqd_get_pq_addr,
- .hqd_reset = kgd_gfx_v11_hqd_reset
+ .hqd_reset = kgd_gfx_v11_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v11_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
index 0dfe7093bd8a..6f0dc23c901b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c
@@ -361,6 +361,12 @@ static uint32_t kgd_gfx_v12_clear_address_watch(struct amdgpu_device *adev,
return 0;
}
+static uint32_t kgd_gfx_v12_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v12_kfd2kgd = {
.init_interrupts = init_interrupts_v12,
.hqd_dump = hqd_dump_v12,
@@ -374,4 +380,5 @@ const struct kfd2kgd_calls gfx_v12_kfd2kgd = {
.set_wave_launch_mode = kgd_gfx_v12_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v12_set_address_watch,
.clear_address_watch = kgd_gfx_v12_clear_address_watch,
+ .hqd_sdma_get_doorbell = kgd_gfx_v12_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 3bc0cbf45bc5..088d09cc7a72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -944,9 +944,7 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
*
* @adev: Handle of device whose registers are to be read
* @queue_idx: Index of queue in the queue-map bit-field
- * @wave_cnt: Output parameter updated with number of waves in flight
- * @vmid: Output parameter updated with VMID of queue whose wave count
- * is being collected
+ * @queue_cnt: Stores the wave count and doorbell offset for an active queue
* @inst: xcc's instance number on a multi-XCC setup
*/
static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
@@ -1079,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
adev->gfx.cu_info.max_waves_per_simd;
}
-void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;
- /*
- * The CP cannot handle a 0 grace period input and will result in
- * an infinite grace period being set so set to 1 to prevent this.
- */
- if (grace_period == 0)
- grace_period = 1;
-
- *reg_data = REG_SET_FIELD(*reg_data,
- CP_IQ_WAIT_TIME2,
- SCH_WAVE,
- grace_period);
+ if (sch_wave)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ SCH_WAVE,
+ sch_wave);
+ if (que_sleep)
+ *reg_data = REG_SET_FIELD(*reg_data,
+ CP_IQ_WAIT_TIME2,
+ QUE_SLEEP,
+ que_sleep);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
@@ -1133,10 +1131,6 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
uint32_t low, high;
uint64_t queue_addr = 0;
- if (!adev->debug_exp_resets &&
- !adev->gfx.num_gfx_rings)
- return 0;
-
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
amdgpu_gfx_rlc_enter_safe_mode(adev, inst);
@@ -1229,6 +1223,13 @@ unlock_out:
return queue_addr;
}
+uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue)
+
+{
+ return 0;
+}
+
const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -1254,9 +1255,10 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
- .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
+ .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
- .hqd_reset = kgd_gfx_v9_hqd_reset
+ .hqd_reset = kgd_gfx_v9_hqd_reset,
+ .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index b6a91a552aa4..704452ca62f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
-void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
- uint32_t grace_period,
+ uint32_t sch_wave,
+ uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
@@ -111,3 +112,5 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
uint32_t queue_id,
uint32_t inst,
unsigned int utimeout);
+uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev,
+ int engine, int queue);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index fa572ba7f9fc..260165bbe373 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -197,7 +197,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
return -EINVAL;
vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
- if (adev->flags & AMD_IS_APU) {
+ if (adev->apu_prefer_gtt) {
system_mem_needed = size;
ttm_mem_needed = size;
}
@@ -234,7 +234,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
if (adev && xcp_id >= 0) {
adev->kfd.vram_used[xcp_id] += vram_needed;
adev->kfd.vram_used_aligned[xcp_id] +=
- (adev->flags & AMD_IS_APU) ?
+ adev->apu_prefer_gtt ?
vram_needed :
ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
}
@@ -262,7 +262,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
if (adev) {
adev->kfd.vram_used[xcp_id] -= size;
- if (adev->flags & AMD_IS_APU) {
+ if (adev->apu_prefer_gtt) {
adev->kfd.vram_used_aligned[xcp_id] -= size;
kfd_mem_limit.system_mem_used -= size;
kfd_mem_limit.ttm_mem_used -= size;
@@ -370,40 +370,32 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
return 0;
}
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+/**
+ * amdgpu_amdkfd_remove_all_eviction_fences - Remove all eviction fences
+ * @bo: the BO where to remove the evictions fences from.
+ *
+ * This functions should only be used on release when all references to the BO
+ * are already dropped. We remove the eviction fence from the private copy of
+ * the dma_resv object here since that is what is used during release to
+ * determine of the BO is idle or not.
+ */
+void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo)
{
- struct amdgpu_bo *root = bo;
- struct amdgpu_vm_bo_base *vm_bo;
- struct amdgpu_vm *vm;
- struct amdkfd_process_info *info;
- struct amdgpu_amdkfd_fence *ef;
- int ret;
-
- /* we can always get vm_bo from root PD bo.*/
- while (root->parent)
- root = root->parent;
+ struct dma_resv *resv = &bo->tbo.base._resv;
+ struct dma_fence *fence, *stub;
+ struct dma_resv_iter cursor;
- vm_bo = root->vm_bo;
- if (!vm_bo)
- return 0;
+ dma_resv_assert_held(resv);
- vm = vm_bo->vm;
- if (!vm)
- return 0;
-
- info = vm->process_info;
- if (!info || !info->eviction_fence)
- return 0;
-
- ef = container_of(dma_fence_get(&info->eviction_fence->base),
- struct amdgpu_amdkfd_fence, base);
-
- BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
- ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
- dma_resv_unlock(bo->tbo.base.resv);
+ stub = dma_fence_get_stub();
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
+ if (!to_amdgpu_amdkfd_fence(fence))
+ continue;
- dma_fence_put(&ef->base);
- return ret;
+ dma_resv_replace_fences(resv, fence->context, stub,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+ dma_fence_put(stub);
}
static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
@@ -499,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret)
return ret;
- return amdgpu_sync_fence(sync, vm->last_update);
+ return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL);
}
static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
@@ -603,12 +595,6 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
{
struct ttm_operation_ctx ctx = {.interruptible = true};
struct amdgpu_bo *bo = attachment->bo_va->base.bo;
- int ret;
-
- amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (ret)
- return ret;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -730,7 +716,7 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
return;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
sg_free_table(ttm->sg);
@@ -779,7 +765,7 @@ kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
}
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
- ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -890,7 +876,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
* if peer device has large BAR. In contrast, access over xGMI is
* allowed for both small and large BAR configurations of peer device
*/
- if ((adev != bo_adev && !(adev->flags & AMD_IS_APU)) &&
+ if ((adev != bo_adev && !adev->apu_prefer_gtt) &&
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@@ -989,7 +975,7 @@ unwind:
if (!attachment[i])
continue;
if (attachment[i]->bo_va) {
- amdgpu_bo_reserve(bo[i], true);
+ (void)amdgpu_bo_reserve(bo[i], true);
if (--attachment[i]->bo_va->ref_count == 0)
amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
amdgpu_bo_unreserve(bo[i]);
@@ -1259,11 +1245,11 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem,
return -EBUSY;
}
- amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+ (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
- amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+ (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
- amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
return 0;
}
@@ -1287,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
}
- return amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
}
static int map_bo_to_gpuvm(struct kgd_mem *mem,
@@ -1529,27 +1515,6 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
amdgpu_bo_unreserve(bo);
}
-int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
- struct amdgpu_vm *avm, u32 pasid)
-
-{
- int ret;
-
- /* Free the original amdgpu allocated pasid,
- * will be replaced with kfd allocated pasid.
- */
- if (avm->pasid) {
- amdgpu_pasid_free(avm->pasid);
- amdgpu_vm_set_pasid(adev, avm, 0);
- }
-
- ret = amdgpu_vm_set_pasid(adev, avm, pasid);
- if (ret)
- return ret;
-
- return 0;
-}
-
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
struct amdgpu_vm *avm,
void **process_info,
@@ -1607,27 +1572,6 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
}
}
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
- void *drm_priv)
-{
- struct amdgpu_vm *avm;
-
- if (WARN_ON(!adev || !drm_priv))
- return;
-
- avm = drm_priv_to_vm(drm_priv);
-
- pr_debug("Releasing process vm %p\n", avm);
-
- /* The original pasid of amdgpu vm has already been
- * released during making a amdgpu vm to a compute vm
- * The current pasid is managed by kfd and will be
- * released on kfd process destroy. Set amdgpu pasid
- * to 0 to avoid duplicate release.
- */
- amdgpu_vm_release_compute(adev, avm);
-}
-
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1688,7 +1632,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
- reserved_for_pt
- reserved_for_ras;
- if (adev->flags & AMD_IS_APU) {
+ if (adev->apu_prefer_gtt) {
system_mem_available = no_system_mem_limit ?
kfd_mem_limit.max_system_mem_limit :
kfd_mem_limit.max_system_mem_limit -
@@ -1736,7 +1680,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
- if (adev->flags & AMD_IS_APU) {
+ if (adev->apu_prefer_gtt) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
@@ -1987,7 +1931,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
if (size) {
if (!is_imported &&
(mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
- ((adev->flags & AMD_IS_APU) &&
+ (adev->apu_prefer_gtt &&
mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
*size = bo_size;
else
@@ -2352,7 +2296,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
{
struct amdgpu_bo *bo = mem->bo;
- amdgpu_bo_reserve(bo, true);
+ (void)amdgpu_bo_reserve(bo, true);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
@@ -2414,7 +2358,7 @@ static int import_obj_create(struct amdgpu_device *adev,
(*mem)->bo = bo;
(*mem)->va = va;
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
- !(adev->flags & AMD_IS_APU) ?
+ !adev->apu_prefer_gtt ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
(*mem)->mapped_to_gpu_memory = 0;
@@ -2524,11 +2468,14 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
/* First eviction, stop the queues */
r = kgd2kfd_quiesce_mm(mni->mm,
KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
- if (r)
+
+ if (r && r != -ESRCH)
pr_err("Failed to quiesce KFD\n");
- queue_delayed_work(system_freezable_wq,
- &process_info->restore_userptr_work,
- msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+
+ if (r != -ESRCH)
+ queue_delayed_work(system_freezable_wq,
+ &process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
}
mutex_unlock(&process_info->notifier_lock);
@@ -2612,6 +2559,18 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
if (ret != -EFAULT)
return ret;
+ /* If applications unmap memory before destroying the userptr
+ * from the KFD, trigger a segmentation fault in VM debug mode.
+ */
+ if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
+ pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
+ pid_nr(process_info->pid), mem->va);
+
+ // Send GPU VM fault to user space
+ kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
+ mem->va);
+ }
+
ret = 0;
}
@@ -2966,7 +2925,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
}
dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
DMA_RESV_USAGE_KERNEL, fence) {
- ret = amdgpu_sync_fence(&sync_obj, fence);
+ ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 0c8975ac5af9..e476e45b996a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -36,13 +36,6 @@
#include "atombios_encoders.h"
#include "bif/bif_4_1_d.h"
-static void amdgpu_atombios_lookup_i2c_gpio_quirks(struct amdgpu_device *adev,
- ATOM_GPIO_I2C_ASSIGMENT *gpio,
- u8 index)
-{
-
-}
-
static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio)
{
struct amdgpu_i2c_bus_rec i2c;
@@ -108,9 +101,6 @@ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *
gpio = &i2c_info->asGPIO_Info[0];
for (i = 0; i < num_indices; i++) {
-
- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
-
if (gpio->sucI2cId.ucAccess == id) {
i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
break;
@@ -142,8 +132,6 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
gpio = &i2c_info->asGPIO_Info[0];
for (i = 0; i < num_indices; i++) {
- amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i);
-
i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
if (i2c.valid) {
@@ -156,6 +144,38 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev)
}
}
+void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+ ATOM_GPIO_I2C_ASSIGMENT *gpio;
+ struct amdgpu_i2c_bus_rec i2c;
+ int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info);
+ struct _ATOM_GPIO_I2C_INFO *i2c_info;
+ uint16_t data_offset, size;
+ int i, num_indices;
+ char stmp[32];
+
+ if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) {
+ i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset);
+
+ num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
+ sizeof(ATOM_GPIO_I2C_ASSIGMENT);
+
+ gpio = &i2c_info->asGPIO_Info[0];
+ for (i = 0; i < num_indices; i++) {
+ i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio);
+
+ if (i2c.valid && i2c.i2c_id == i2c_id) {
+ sprintf(stmp, "OEM 0x%x", i2c.i2c_id);
+ adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp);
+ break;
+ }
+ gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
+ ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
+ }
+ }
+}
+
struct amdgpu_gpio_rec
amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
u8 id)
@@ -1145,8 +1165,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
return 0;
}
-void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
- u32 eng_clock, u32 mem_clock)
+int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+ u32 eng_clock, u32 mem_clock)
{
SET_ENGINE_CLOCK_PS_ALLOCATION args;
int index = GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings);
@@ -1161,8 +1181,8 @@ void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
if (mem_clock)
args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK);
- amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args,
- sizeof(args));
+ return amdgpu_atom_execute_table(adev->mode_info.atom_context, index,
+ (uint32_t *)&args, sizeof(args));
}
void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
index 0811474e8fd3..867bc5c5ce67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -136,6 +136,7 @@ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev,
struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev,
uint8_t id);
void amdgpu_atombios_i2c_init(struct amdgpu_device *adev);
+void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id);
bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev);
@@ -163,8 +164,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev,
bool strobe_mode,
struct atom_mpll_param *mpll_param);
-void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
- u32 eng_clock, u32 mem_clock);
+int amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev,
+ u32 eng_clock, u32 mem_clock);
bool
amdgpu_atombios_is_voltage_gpio(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index f873dd3cae16..c7d32fb216e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -281,6 +281,9 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
case ATOM_DGPU_VRAM_TYPE_GDDR6:
vram_type = AMDGPU_VRAM_TYPE_GDDR6;
break;
+ case ATOM_DGPU_VRAM_TYPE_HBM3E:
+ vram_type = AMDGPU_VRAM_TYPE_HBM3E;
+ break;
default:
vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
break;
@@ -549,9 +552,10 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
u16 data_offset, size;
union umc_info *umc_info;
u8 frev, crev;
- bool ecc_default_enabled = false;
+ bool mem_ecc_enabled = false;
u8 umc_config;
u32 umc_config1;
+ adev->ras_default_ecc_enabled = false;
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
umc_info);
@@ -563,20 +567,22 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
switch (crev) {
case 1:
umc_config = le32_to_cpu(umc_info->v31.umc_config);
- ecc_default_enabled =
+ mem_ecc_enabled =
(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
case 2:
umc_config = le32_to_cpu(umc_info->v32.umc_config);
- ecc_default_enabled =
+ mem_ecc_enabled =
(umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
case 3:
umc_config = le32_to_cpu(umc_info->v33.umc_config);
umc_config1 = le32_to_cpu(umc_info->v33.umc_config1);
- ecc_default_enabled =
+ mem_ecc_enabled =
((umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ||
(umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE)) ? true : false;
+ adev->ras_default_ecc_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
default:
/* unsupported crev */
@@ -585,9 +591,12 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
} else if (frev == 4) {
switch (crev) {
case 0:
+ umc_config = le32_to_cpu(umc_info->v40.umc_config);
umc_config1 = le32_to_cpu(umc_info->v40.umc_config1);
- ecc_default_enabled =
+ mem_ecc_enabled =
(umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false;
+ adev->ras_default_ecc_enabled =
+ (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false;
break;
default:
/* unsupported crev */
@@ -599,7 +608,7 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
}
}
- return ecc_default_enabled;
+ return mem_ecc_enabled;
}
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 375f02002579..3893e6fc2f03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -89,18 +89,6 @@ bool amdgpu_is_atpx_hybrid(void)
return amdgpu_atpx_priv.atpx.is_hybrid;
}
-bool amdgpu_atpx_dgpu_req_power_for_displays(void)
-{
- return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays;
-}
-
-#if defined(CONFIG_ACPI)
-void *amdgpu_atpx_get_dhandle(void)
-{
- return amdgpu_atpx_priv.dhandle;
-}
-#endif
-
/**
* amdgpu_atpx_call - call an ATPX method
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 45affc02548c..00e96419fcda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -47,41 +47,50 @@
/* Check if current bios is an ATOM BIOS.
* Return true if it is ATOM BIOS. Otherwise, return false.
*/
-static bool check_atom_bios(uint8_t *bios, size_t size)
+static bool check_atom_bios(struct amdgpu_device *adev, size_t size)
{
uint16_t tmp, bios_header_start;
+ uint8_t *bios = adev->bios;
if (!bios || size < 0x49) {
- DRM_INFO("vbios mem is null or mem size is wrong\n");
+ dev_dbg(adev->dev, "VBIOS mem is null or mem size is wrong\n");
return false;
}
if (!AMD_IS_VALID_VBIOS(bios)) {
- DRM_INFO("BIOS signature incorrect %x %x\n", bios[0], bios[1]);
+ dev_dbg(adev->dev, "VBIOS signature incorrect %x %x\n", bios[0],
+ bios[1]);
return false;
}
bios_header_start = bios[0x48] | (bios[0x49] << 8);
if (!bios_header_start) {
- DRM_INFO("Can't locate bios header\n");
+ dev_dbg(adev->dev, "Can't locate VBIOS header\n");
return false;
}
tmp = bios_header_start + 4;
if (size < tmp) {
- DRM_INFO("BIOS header is broken\n");
+ dev_dbg(adev->dev, "VBIOS header is broken\n");
return false;
}
if (!memcmp(bios + tmp, "ATOM", 4) ||
!memcmp(bios + tmp, "MOTA", 4)) {
- DRM_DEBUG("ATOMBIOS detected\n");
+ dev_dbg(adev->dev, "ATOMBIOS detected\n");
return true;
}
return false;
}
+void amdgpu_bios_release(struct amdgpu_device *adev)
+{
+ kfree(adev->bios);
+ adev->bios = NULL;
+ adev->bios_size = 0;
+}
+
/* If you boot an IGP board with a discrete card as the primary,
* the IGP rom is not accessible via the rom bar as the IGP rom is
* part of the system bios. On boot, the system bios puts a
@@ -118,8 +127,8 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, size);
iounmap(bios);
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -146,8 +155,8 @@ bool amdgpu_read_bios(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, size);
pci_unmap_rom(adev->pdev, bios);
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -186,8 +195,8 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev)
/* read complete BIOS */
amdgpu_asic_read_bios_from_rom(adev, adev->bios, len);
- if (!check_atom_bios(adev->bios, len)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, len)) {
+ amdgpu_bios_release(adev);
return false;
}
@@ -216,14 +225,15 @@ static bool amdgpu_read_platform_bios(struct amdgpu_device *adev)
memcpy_fromio(adev->bios, bios, romlen);
iounmap(bios);
- if (!check_atom_bios(adev->bios, romlen))
+ if (!check_atom_bios(adev, romlen))
goto free_bios;
adev->bios_size = romlen;
return true;
free_bios:
- kfree(adev->bios);
+ amdgpu_bios_release(adev);
+
return false;
}
@@ -324,8 +334,8 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
break;
}
- if (!check_atom_bios(adev->bios, size)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, size)) {
+ amdgpu_bios_release(adev);
return false;
}
adev->bios_size = size;
@@ -389,8 +399,8 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
vhdr->ImageLength,
GFP_KERNEL);
- if (!check_atom_bios(adev->bios, vhdr->ImageLength)) {
- kfree(adev->bios);
+ if (!check_atom_bios(adev, vhdr->ImageLength)) {
+ amdgpu_bios_release(adev);
return false;
}
adev->bios_size = vhdr->ImageLength;
@@ -437,6 +447,13 @@ success:
return true;
}
+static bool amdgpu_prefer_rom_resource(struct amdgpu_device *adev)
+{
+ struct resource *res = &adev->pdev->resource[PCI_ROM_RESOURCE];
+
+ return (res->flags & IORESOURCE_ROM_SHADOW);
+}
+
static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev)
{
if (amdgpu_atrm_get_bios(adev)) {
@@ -455,14 +472,27 @@ static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev)
goto success;
}
- if (amdgpu_read_platform_bios(adev)) {
- dev_info(adev->dev, "Fetched VBIOS from platform\n");
- goto success;
- }
+ if (amdgpu_prefer_rom_resource(adev)) {
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
- if (amdgpu_read_bios(adev)) {
- dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
- goto success;
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ } else {
+ if (amdgpu_read_platform_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from platform\n");
+ goto success;
+ }
+
+ if (amdgpu_read_bios(adev)) {
+ dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
+ goto success;
+ }
}
if (amdgpu_read_bios_from_rom(adev)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 16153d275d7a..004a6a9d6b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -252,83 +252,22 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
if (!adev->pm.fw) {
switch (adev->asic_type) {
- case CHIP_TAHITI:
- strcpy(fw_name, "radeon/tahiti_smc.bin");
- break;
- case CHIP_PITCAIRN:
- if ((adev->pdev->revision == 0x81) &&
- ((adev->pdev->device == 0x6810) ||
- (adev->pdev->device == 0x6811))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/pitcairn_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/pitcairn_smc.bin");
- }
- break;
- case CHIP_VERDE:
- if (((adev->pdev->device == 0x6820) &&
- ((adev->pdev->revision == 0x81) ||
- (adev->pdev->revision == 0x83))) ||
- ((adev->pdev->device == 0x6821) &&
- ((adev->pdev->revision == 0x83) ||
- (adev->pdev->revision == 0x87))) ||
- ((adev->pdev->revision == 0x87) &&
- ((adev->pdev->device == 0x6823) ||
- (adev->pdev->device == 0x682b)))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/verde_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/verde_smc.bin");
- }
- break;
- case CHIP_OLAND:
- if (((adev->pdev->revision == 0x81) &&
- ((adev->pdev->device == 0x6600) ||
- (adev->pdev->device == 0x6604) ||
- (adev->pdev->device == 0x6605) ||
- (adev->pdev->device == 0x6610))) ||
- ((adev->pdev->revision == 0x83) &&
- (adev->pdev->device == 0x6610))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/oland_k_smc.bin");
- } else {
- strcpy(fw_name, "radeon/oland_smc.bin");
- }
- break;
- case CHIP_HAINAN:
- if (((adev->pdev->revision == 0x81) &&
- (adev->pdev->device == 0x6660)) ||
- ((adev->pdev->revision == 0x83) &&
- ((adev->pdev->device == 0x6660) ||
- (adev->pdev->device == 0x6663) ||
- (adev->pdev->device == 0x6665) ||
- (adev->pdev->device == 0x6667)))) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/hainan_k_smc.bin");
- } else if ((adev->pdev->revision == 0xc3) &&
- (adev->pdev->device == 0x6665)) {
- info->is_kicker = true;
- strcpy(fw_name, "radeon/banks_k_2_smc.bin");
- } else {
- strcpy(fw_name, "radeon/hainan_smc.bin");
- }
- break;
case CHIP_BONAIRE:
if ((adev->pdev->revision == 0x80) ||
(adev->pdev->revision == 0x81) ||
(adev->pdev->device == 0x665f)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/bonaire_k_smc.bin");
+ strscpy(fw_name, "amdgpu/bonaire_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/bonaire_smc.bin");
+ strscpy(fw_name, "amdgpu/bonaire_smc.bin");
}
break;
case CHIP_HAWAII:
if (adev->pdev->revision == 0x80) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/hawaii_k_smc.bin");
+ strscpy(fw_name, "amdgpu/hawaii_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/hawaii_smc.bin");
+ strscpy(fw_name, "amdgpu/hawaii_smc.bin");
}
break;
case CHIP_TOPAZ:
@@ -338,83 +277,85 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) ||
((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
+ strscpy(fw_name, "amdgpu/topaz_k_smc.bin");
} else
- strcpy(fw_name, "amdgpu/topaz_smc.bin");
+ strscpy(fw_name, "amdgpu/topaz_smc.bin");
break;
case CHIP_TONGA:
if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) ||
((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/tonga_k_smc.bin");
+ strscpy(fw_name, "amdgpu/tonga_k_smc.bin");
} else
- strcpy(fw_name, "amdgpu/tonga_smc.bin");
+ strscpy(fw_name, "amdgpu/tonga_smc.bin");
break;
case CHIP_FIJI:
- strcpy(fw_name, "amdgpu/fiji_smc.bin");
+ strscpy(fw_name, "amdgpu/fiji_smc.bin");
break;
case CHIP_POLARIS11:
if (type == CGS_UCODE_ID_SMU) {
if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris11_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_k_smc.bin");
} else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_k2_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris11_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris11_smc.bin");
}
} else if (type == CGS_UCODE_ID_SMU_SK) {
- strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
+ strscpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
}
break;
case CHIP_POLARIS10:
if (type == CGS_UCODE_ID_SMU) {
if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_k_smc.bin");
} else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_k2_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris10_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris10_smc.bin");
}
} else if (type == CGS_UCODE_ID_SMU_SK) {
- strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
+ strscpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
}
break;
case CHIP_POLARIS12:
if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {
info->is_kicker = true;
- strcpy(fw_name, "amdgpu/polaris12_k_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris12_k_smc.bin");
} else {
- strcpy(fw_name, "amdgpu/polaris12_smc.bin");
+ strscpy(fw_name, "amdgpu/polaris12_smc.bin");
}
break;
case CHIP_VEGAM:
- strcpy(fw_name, "amdgpu/vegam_smc.bin");
+ strscpy(fw_name, "amdgpu/vegam_smc.bin");
break;
case CHIP_VEGA10:
if ((adev->pdev->device == 0x687f) &&
((adev->pdev->revision == 0xc0) ||
(adev->pdev->revision == 0xc1) ||
(adev->pdev->revision == 0xc3)))
- strcpy(fw_name, "amdgpu/vega10_acg_smc.bin");
+ strscpy(fw_name, "amdgpu/vega10_acg_smc.bin");
else
- strcpy(fw_name, "amdgpu/vega10_smc.bin");
+ strscpy(fw_name, "amdgpu/vega10_smc.bin");
break;
case CHIP_VEGA12:
- strcpy(fw_name, "amdgpu/vega12_smc.bin");
+ strscpy(fw_name, "amdgpu/vega12_smc.bin");
break;
case CHIP_VEGA20:
- strcpy(fw_name, "amdgpu/vega20_smc.bin");
+ strscpy(fw_name, "amdgpu/vega20_smc.bin");
break;
default:
DRM_ERROR("SMC firmware not supported\n");
return -EINVAL;
}
- err = amdgpu_ucode_request(adev, &adev->pm.fw, "%s", fw_name);
+ err = amdgpu_ucode_request(adev, &adev->pm.fw,
+ AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name);
if (err) {
DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
amdgpu_ucode_release(&adev->pm.fw);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 344e0a9ee08a..5e375e9c4f5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -674,7 +674,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector)
}
static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
@@ -839,7 +839,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector)
}
static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -1196,7 +1196,7 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector)
}
static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -1464,7 +1464,7 @@ out:
}
static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct amdgpu_connector_atom_dig *amdgpu_dig_connector = amdgpu_connector->con_priv;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
new file mode 100644
index 000000000000..5a234eadae8b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c
@@ -0,0 +1,581 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/list.h>
+#include "amdgpu.h"
+
+static const guid_t MCE = CPER_NOTIFY_MCE;
+static const guid_t CMC = CPER_NOTIFY_CMC;
+static const guid_t BOOT = BOOT_TYPE;
+
+static const guid_t CRASHDUMP = AMD_CRASHDUMP;
+static const guid_t RUNTIME = AMD_GPU_NONSTANDARD_ERROR;
+
+static void __inc_entry_length(struct cper_hdr *hdr, uint32_t size)
+{
+ hdr->record_length += size;
+}
+
+static void amdgpu_cper_get_timestamp(struct cper_timestamp *timestamp)
+{
+ struct tm tm;
+ time64_t now = ktime_get_real_seconds();
+
+ time64_to_tm(now, 0, &tm);
+ timestamp->seconds = tm.tm_sec;
+ timestamp->minutes = tm.tm_min;
+ timestamp->hours = tm.tm_hour;
+ timestamp->flag = 0;
+ timestamp->day = tm.tm_mday;
+ timestamp->month = 1 + tm.tm_mon;
+ timestamp->year = (1900 + tm.tm_year) % 100;
+ timestamp->century = (1900 + tm.tm_year) / 100;
+}
+
+void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ enum amdgpu_cper_type type,
+ enum cper_error_severity sev)
+{
+ char record_id[16];
+
+ hdr->signature[0] = 'C';
+ hdr->signature[1] = 'P';
+ hdr->signature[2] = 'E';
+ hdr->signature[3] = 'R';
+ hdr->revision = CPER_HDR_REV_1;
+ hdr->signature_end = 0xFFFFFFFF;
+ hdr->error_severity = sev;
+
+ hdr->valid_bits.platform_id = 1;
+ hdr->valid_bits.partition_id = 1;
+ hdr->valid_bits.timestamp = 1;
+
+ amdgpu_cper_get_timestamp(&hdr->timestamp);
+
+ snprintf(record_id, 9, "%d:%X",
+ (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+ adev->smuio.funcs->get_socket_id(adev) :
+ 0,
+ atomic_inc_return(&adev->cper.unique_id));
+ memcpy(hdr->record_id, record_id, 8);
+
+ snprintf(hdr->platform_id, 16, "0x%04X:0x%04X",
+ adev->pdev->vendor, adev->pdev->device);
+ /* pmfw version should be part of creator_id according to CPER spec */
+ snprintf(hdr->creator_id, 16, "%s", CPER_CREATOR_ID_AMDGPU);
+
+ switch (type) {
+ case AMDGPU_CPER_TYPE_BOOT:
+ hdr->notify_type = BOOT;
+ break;
+ case AMDGPU_CPER_TYPE_FATAL:
+ case AMDGPU_CPER_TYPE_BP_THRESHOLD:
+ hdr->notify_type = MCE;
+ break;
+ case AMDGPU_CPER_TYPE_RUNTIME:
+ if (sev == CPER_SEV_NON_FATAL_CORRECTED)
+ hdr->notify_type = CMC;
+ else
+ hdr->notify_type = MCE;
+ break;
+ default:
+ dev_err(adev->dev, "Unknown CPER Type\n");
+ break;
+ }
+
+ __inc_entry_length(hdr, HDR_LEN);
+}
+
+static int amdgpu_cper_entry_fill_section_desc(struct amdgpu_device *adev,
+ struct cper_sec_desc *section_desc,
+ bool bp_threshold,
+ bool poison,
+ enum cper_error_severity sev,
+ guid_t sec_type,
+ uint32_t section_length,
+ uint32_t section_offset)
+{
+ section_desc->revision_minor = CPER_SEC_MINOR_REV_1;
+ section_desc->revision_major = CPER_SEC_MAJOR_REV_22;
+ section_desc->sec_offset = section_offset;
+ section_desc->sec_length = section_length;
+ section_desc->valid_bits.fru_text = 1;
+ section_desc->flag_bits.primary = 1;
+ section_desc->severity = sev;
+ section_desc->sec_type = sec_type;
+
+ snprintf(section_desc->fru_text, 20, "OAM%d",
+ (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ?
+ adev->smuio.funcs->get_socket_id(adev) :
+ 0);
+
+ if (bp_threshold)
+ section_desc->flag_bits.exceed_err_threshold = 1;
+ if (poison)
+ section_desc->flag_bits.latent_err = 1;
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ struct cper_sec_crashdump_reg_data reg_data)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_crashdump_fatal *section;
+
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_crashdump_fatal *)((uint8_t *)hdr +
+ FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, false,
+ CPER_SEV_FATAL, CRASHDUMP, FATAL_SEC_LEN,
+ FATAL_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ section->body.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->body.reg_arr_size = sizeof(reg_data);
+ section->body.data = reg_data;
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + FATAL_SEC_LEN);
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ enum cper_error_severity sev,
+ uint32_t *reg_dump,
+ uint32_t reg_count)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_nonstd_err *section;
+ bool poison;
+
+ poison = (sev == CPER_SEV_NON_FATAL_CORRECTED) ? false : true;
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, poison,
+ sev, RUNTIME, NONSTD_SEC_LEN,
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ reg_count = umin(reg_count, CPER_ACA_REG_COUNT);
+
+ section->hdr.valid_bits.err_info_cnt = 1;
+ section->hdr.valid_bits.err_context_cnt = 1;
+
+ section->info.error_type = RUNTIME;
+ section->info.ms_chk_bits.err_type_valid = 1;
+ section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
+
+ memcpy(section->ctx.reg_dump, reg_dump, reg_count * sizeof(uint32_t));
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
+
+ return 0;
+}
+
+int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx)
+{
+ struct cper_sec_desc *section_desc;
+ struct cper_sec_nonstd_err *section;
+
+ section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
+ section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false,
+ CPER_SEV_NUM, RUNTIME, NONSTD_SEC_LEN,
+ NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
+
+ section->hdr.valid_bits.err_info_cnt = 1;
+ section->hdr.valid_bits.err_context_cnt = 1;
+
+ section->info.error_type = RUNTIME;
+ section->info.ms_chk_bits.err_type_valid = 1;
+ section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
+ section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
+
+ /* Hardcoded Reg dump for bad page threshold CPER */
+ section->ctx.reg_dump[CPER_ACA_REG_CTL_LO] = 0x1;
+ section->ctx.reg_dump[CPER_ACA_REG_CTL_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_STATUS_LO] = 0x137;
+ section->ctx.reg_dump[CPER_ACA_REG_STATUS_HI] = 0xB0000000;
+ section->ctx.reg_dump[CPER_ACA_REG_ADDR_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_ADDR_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_MISC0_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_MISC0_HI] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_CONFIG_LO] = 0x2;
+ section->ctx.reg_dump[CPER_ACA_REG_CONFIG_HI] = 0x1ff;
+ section->ctx.reg_dump[CPER_ACA_REG_IPID_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_IPID_HI] = 0x96;
+ section->ctx.reg_dump[CPER_ACA_REG_SYND_LO] = 0x0;
+ section->ctx.reg_dump[CPER_ACA_REG_SYND_HI] = 0x0;
+
+ __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN);
+
+ return 0;
+}
+
+struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
+ enum amdgpu_cper_type type,
+ uint16_t section_count)
+{
+ struct cper_hdr *hdr;
+ uint32_t size = 0;
+
+ size += HDR_LEN;
+ size += (SEC_DESC_LEN * section_count);
+
+ switch (type) {
+ case AMDGPU_CPER_TYPE_RUNTIME:
+ case AMDGPU_CPER_TYPE_BP_THRESHOLD:
+ size += (NONSTD_SEC_LEN * section_count);
+ break;
+ case AMDGPU_CPER_TYPE_FATAL:
+ size += (FATAL_SEC_LEN * section_count);
+ break;
+ case AMDGPU_CPER_TYPE_BOOT:
+ size += (BOOT_SEC_LEN * section_count);
+ break;
+ default:
+ dev_err(adev->dev, "Unknown CPER Type!\n");
+ return NULL;
+ }
+
+ hdr = kzalloc(size, GFP_KERNEL);
+ if (!hdr)
+ return NULL;
+
+ /* Save this early */
+ hdr->sec_cnt = section_count;
+
+ return hdr;
+}
+
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+ struct aca_bank *bank)
+{
+ struct cper_hdr *fatal = NULL;
+ struct cper_sec_crashdump_reg_data reg_data = { 0 };
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ int ret;
+
+ fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1);
+ if (!fatal) {
+ dev_err(adev->dev, "fail to alloc cper entry for ue record\n");
+ return -ENOMEM;
+ }
+
+ reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data.addr_lo = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data.addr_hi = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data.ipid_lo = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data.ipid_hi = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data.synd_lo = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+ reg_data.synd_hi = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+ amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, CPER_SEV_FATAL);
+ ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data);
+ if (ret)
+ return ret;
+
+ amdgpu_cper_ring_write(ring, fatal, fatal->record_length);
+ kfree(fatal);
+
+ return 0;
+}
+
+int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev)
+{
+ struct cper_hdr *bp_threshold = NULL;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ int ret;
+
+ bp_threshold = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_BP_THRESHOLD, 1);
+ if (!bp_threshold) {
+ dev_err(adev->dev, "fail to alloc cper entry for bad page threshold record\n");
+ return -ENOMEM;
+ }
+
+ amdgpu_cper_entry_fill_hdr(adev, bp_threshold, AMDGPU_CPER_TYPE_BP_THRESHOLD, CPER_SEV_NUM);
+ ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0);
+ if (ret)
+ return ret;
+
+ amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length);
+ kfree(bp_threshold);
+
+ return 0;
+}
+
+static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev,
+ enum aca_error_type aca_err_type)
+{
+ switch (aca_err_type) {
+ case ACA_ERROR_TYPE_UE:
+ return CPER_SEV_FATAL;
+ case ACA_ERROR_TYPE_CE:
+ return CPER_SEV_NON_FATAL_CORRECTED;
+ case ACA_ERROR_TYPE_DEFERRED:
+ return CPER_SEV_NON_FATAL_UNCORRECTED;
+ default:
+ dev_err(adev->dev, "Unknown ACA error type!\n");
+ return CPER_SEV_FATAL;
+ }
+}
+
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+ struct aca_banks *banks,
+ uint16_t bank_count)
+{
+ struct cper_hdr *corrected = NULL;
+ enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 };
+ struct aca_bank_node *node;
+ struct aca_bank *bank;
+ uint32_t i = 0;
+ int ret;
+
+ corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, bank_count);
+ if (!corrected) {
+ dev_err(adev->dev, "fail to allocate cper entry for ce records\n");
+ return -ENOMEM;
+ }
+
+ /* Raise severity if any DE is detected in the ACA bank list */
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
+ sev = CPER_SEV_NON_FATAL_UNCORRECTED;
+ break;
+ }
+ }
+
+ amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, sev);
+
+ /* Combine CE and DE in cper record */
+ list_for_each_entry(node, &banks->list, node) {
+ bank = &node->bank;
+ reg_data[CPER_ACA_REG_CTL_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+ reg_data[CPER_ACA_REG_CTL_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CTL]);
+ reg_data[CPER_ACA_REG_STATUS_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data[CPER_ACA_REG_STATUS_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]);
+ reg_data[CPER_ACA_REG_ADDR_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data[CPER_ACA_REG_ADDR_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]);
+ reg_data[CPER_ACA_REG_MISC0_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+ reg_data[CPER_ACA_REG_MISC0_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]);
+ reg_data[CPER_ACA_REG_CONFIG_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+ reg_data[CPER_ACA_REG_CONFIG_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]);
+ reg_data[CPER_ACA_REG_IPID_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data[CPER_ACA_REG_IPID_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]);
+ reg_data[CPER_ACA_REG_SYND_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+ reg_data[CPER_ACA_REG_SYND_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]);
+
+ ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i++,
+ amdgpu_aca_err_type_to_cper_sev(adev, bank->aca_err_type),
+ reg_data, CPER_ACA_REG_COUNT);
+ if (ret)
+ return ret;
+ }
+
+ amdgpu_cper_ring_write(ring, corrected, corrected->record_length);
+ kfree(corrected);
+
+ return 0;
+}
+
+static bool amdgpu_cper_is_hdr(struct amdgpu_ring *ring, u64 pos)
+{
+ struct cper_hdr *chdr;
+
+ chdr = (struct cper_hdr *)&(ring->ring[pos]);
+ return strcmp(chdr->signature, "CPER") ? false : true;
+}
+
+static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos)
+{
+ struct cper_hdr *chdr;
+ u64 p;
+ u32 chunk, rec_len = 0;
+
+ chdr = (struct cper_hdr *)&(ring->ring[pos]);
+ chunk = ring->ring_size - (pos << 2);
+
+ if (!strcmp(chdr->signature, "CPER")) {
+ rec_len = chdr->record_length;
+ goto calc;
+ }
+
+ /* ring buffer is not full, no cper data after ring->wptr */
+ if (ring->count_dw)
+ goto calc;
+
+ for (p = pos + 1; p <= ring->buf_mask; p++) {
+ chdr = (struct cper_hdr *)&(ring->ring[p]);
+ if (!strcmp(chdr->signature, "CPER")) {
+ rec_len = (p - pos) << 2;
+ goto calc;
+ }
+ }
+
+calc:
+ if (!rec_len)
+ return chunk;
+ else
+ return umin(rec_len, chunk);
+}
+
+void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count)
+{
+ u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask;
+ int rec_cnt_dw = count >> 2;
+ u32 chunk, ent_sz;
+ u8 *s = (u8 *)src;
+
+ if (count >= ring->ring_size - 4) {
+ dev_err(ring->adev->dev,
+ "CPER data size(%d) is larger than ring size(%d)\n",
+ count, ring->ring_size - 4);
+
+ return;
+ }
+
+ wptr_old = ring->wptr;
+
+ mutex_lock(&ring->adev->cper.ring_lock);
+ while (count) {
+ ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr);
+ chunk = umin(ent_sz, count);
+
+ memcpy(&ring->ring[ring->wptr], s, chunk);
+
+ ring->wptr += (chunk >> 2);
+ ring->wptr &= ring->ptr_mask;
+ count -= chunk;
+ s += chunk;
+ }
+
+ if (ring->count_dw < rec_cnt_dw)
+ ring->count_dw = 0;
+
+ /* the buffer is overflow, adjust rptr */
+ if (((wptr_old < rptr) && (rptr <= ring->wptr)) ||
+ ((ring->wptr < wptr_old) && (wptr_old < rptr)) ||
+ ((rptr <= ring->wptr) && (ring->wptr < wptr_old))) {
+ pos = (ring->wptr + 1) & ring->ptr_mask;
+
+ do {
+ ent_sz = amdgpu_cper_ring_get_ent_sz(ring, pos);
+
+ rptr += (ent_sz >> 2);
+ rptr &= ring->ptr_mask;
+ *ring->rptr_cpu_addr = rptr;
+
+ pos = rptr;
+ } while (!amdgpu_cper_is_hdr(ring, rptr));
+ }
+
+ if (ring->count_dw >= rec_cnt_dw)
+ ring->count_dw -= rec_cnt_dw;
+ mutex_unlock(&ring->adev->cper.ring_lock);
+}
+
+static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ return *(ring->rptr_cpu_addr);
+}
+
+static u64 amdgpu_cper_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ return ring->wptr;
+}
+
+static const struct amdgpu_ring_funcs cper_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_CPER,
+ .align_mask = 0xff,
+ .support_64bit_ptrs = false,
+ .get_rptr = amdgpu_cper_ring_get_rptr,
+ .get_wptr = amdgpu_cper_ring_get_wptr,
+};
+
+static int amdgpu_cper_ring_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &(adev->cper.ring_buf);
+
+ mutex_init(&adev->cper.ring_lock);
+
+ ring->adev = NULL;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = false;
+ ring->no_scheduler = true;
+ ring->funcs = &cper_ring_funcs;
+
+ sprintf(ring->name, "cper");
+ return amdgpu_ring_init(adev, ring, CPER_MAX_RING_SIZE, NULL, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+}
+
+int amdgpu_cper_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
+ return 0;
+
+ r = amdgpu_cper_ring_init(adev);
+ if (r) {
+ dev_err(adev->dev, "failed to initialize cper ring, r = %d\n", r);
+ return r;
+ }
+
+ mutex_init(&adev->cper.cper_lock);
+
+ adev->cper.enabled = true;
+ adev->cper.max_count = CPER_MAX_ALLOWED_COUNT;
+
+ return 0;
+}
+
+int amdgpu_cper_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev))
+ return 0;
+
+ adev->cper.enabled = false;
+
+ amdgpu_ring_fini(&(adev->cper.ring_buf));
+ adev->cper.count = 0;
+ adev->cper.wptr = 0;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
new file mode 100644
index 000000000000..bcb97d245673
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_CPER_H__
+#define __AMDGPU_CPER_H__
+
+#include "amd_cper.h"
+#include "amdgpu_aca.h"
+
+#define CPER_MAX_ALLOWED_COUNT 0x1000
+#define CPER_MAX_RING_SIZE 0X100000
+#define HDR_LEN (sizeof(struct cper_hdr))
+#define SEC_DESC_LEN (sizeof(struct cper_sec_desc))
+
+#define BOOT_SEC_LEN (sizeof(struct cper_sec_crashdump_boot))
+#define FATAL_SEC_LEN (sizeof(struct cper_sec_crashdump_fatal))
+#define NONSTD_SEC_LEN (sizeof(struct cper_sec_nonstd_err))
+
+#define SEC_DESC_OFFSET(idx) (HDR_LEN + (SEC_DESC_LEN * idx))
+
+#define BOOT_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (BOOT_SEC_LEN * idx))
+#define FATAL_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (FATAL_SEC_LEN * idx))
+#define NONSTD_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (NONSTD_SEC_LEN * idx))
+
+enum amdgpu_cper_type {
+ AMDGPU_CPER_TYPE_RUNTIME,
+ AMDGPU_CPER_TYPE_FATAL,
+ AMDGPU_CPER_TYPE_BOOT,
+ AMDGPU_CPER_TYPE_BP_THRESHOLD,
+};
+
+struct amdgpu_cper {
+ bool enabled;
+
+ atomic_t unique_id;
+ struct mutex cper_lock;
+
+ /* Lifetime CPERs generated */
+ uint32_t count;
+ uint32_t max_count;
+
+ uint32_t wptr;
+
+ void *ring[CPER_MAX_ALLOWED_COUNT];
+ struct amdgpu_ring ring_buf;
+ struct mutex ring_lock;
+};
+
+void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ enum amdgpu_cper_type type,
+ enum cper_error_severity sev);
+int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ struct cper_sec_crashdump_reg_data reg_data);
+int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t idx,
+ enum cper_error_severity sev,
+ uint32_t *reg_dump,
+ uint32_t reg_count);
+int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev,
+ struct cper_hdr *hdr,
+ uint32_t section_idx);
+
+struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev,
+ enum amdgpu_cper_type type,
+ uint16_t section_count);
+/* UE must be encoded into separated cper entries, 1 UE 1 cper */
+int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev,
+ struct aca_bank *bank);
+/* CEs and DEs are combined into 1 cper entry */
+int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev,
+ struct aca_banks *banks,
+ uint16_t bank_count);
+/* Bad page threshold is encoded into separated cper entry */
+int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev);
+void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
+ void *src, int count);
+int amdgpu_cper_init(struct amdgpu_device *adev);
+int amdgpu_cper_fini(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d891ab779ca7..9ea0d9b71f48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -296,7 +296,25 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
num_ibs[i], &p->jobs[i]);
if (ret)
goto free_all_kdata;
- p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id];
+ switch (p->adev->enforce_isolation[fpriv->xcp_id]) {
+ case AMDGPU_ENFORCE_ISOLATION_DISABLE:
+ default:
+ p->jobs[i]->enforce_isolation = false;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_ENABLE:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = true;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ case AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER:
+ p->jobs[i]->enforce_isolation = true;
+ p->jobs[i]->run_cleaner_shader = false;
+ break;
+ }
}
p->gang_leader = p->jobs[p->gang_leader_idx];
@@ -349,6 +367,10 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
ring = amdgpu_job_ring(job);
ib = &job->ibs[job->num_ibs++];
+ /* submissions to kernel queues are disabled */
+ if (ring->no_user_submission)
+ return -EINVAL;
+
/* MM engine doesn't support user fences */
if (p->uf_bo && ring->funcs->no_user_fence)
return -EINVAL;
@@ -428,7 +450,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}
- r = amdgpu_sync_fence(&p->sync, fence);
+ r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
dma_fence_put(fence);
if (r)
return r;
@@ -450,7 +472,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
return r;
}
- r = amdgpu_sync_fence(&p->sync, fence);
+ r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
dma_fence_put(fence);
return r;
}
@@ -1105,13 +1127,13 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
* We can't use gang submit on with reserved VMIDs when the VM changes
* can't be invalidated by more than one engine at the same time.
*/
- if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) {
+ if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {
for (i = 0; i < p->gang_size; ++i) {
struct drm_sched_entity *entity = p->entities[i];
struct drm_gpu_scheduler *sched = entity->rq->sched;
struct amdgpu_ring *ring = to_amdgpu_ring(sched);
- if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub))
+ if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
return -EINVAL;
}
}
@@ -1124,7 +1146,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
@@ -1135,7 +1158,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
}
@@ -1154,7 +1178,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
+ GFP_KERNEL);
if (r)
return r;
}
@@ -1167,7 +1192,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, vm->last_update);
+ r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);
if (r)
return r;
@@ -1189,7 +1214,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (!bo)
continue;
- amdgpu_vm_bo_invalidate(adev, bo, false);
+ amdgpu_vm_bo_invalidate(bo, false);
}
}
@@ -1248,7 +1273,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
continue;
}
- r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
+ r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,
+ GFP_KERNEL);
dma_fence_put(fence);
if (r)
return r;
@@ -1801,13 +1827,18 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
return -EINVAL;
+ /* Make sure VRAM is allocated contigiously */
(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
- for (i = 0; i < (*bo)->placement.num_placement; i++)
- (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
- r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
- if (r)
- return r;
+ if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
+ !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
+
+ amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
+ for (i = 0; i < (*bo)->placement.num_placement; i++)
+ (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
+ r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
+ if (r)
+ return r;
+ }
return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index cfdf558b48b6..02138aa55793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -109,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_exec exec;
int r;
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
r = amdgpu_vm_lock_pd(vm, &exec, 0);
if (likely(!r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index c43d1b6e5d66..85567d0d9545 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -919,7 +919,7 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
return timeout;
}
-void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
+static void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
struct amdgpu_ctx *ctx;
struct idr *idp;
@@ -949,19 +949,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
{
- struct amdgpu_ctx *ctx;
- struct idr *idp;
- uint32_t id;
-
amdgpu_ctx_mgr_entity_fini(mgr);
-
- idp = &mgr->ctx_handles;
-
- idr_for_each_entry(idp, ctx, id) {
- if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
- DRM_ERROR("ctx %p is still alive\n", ctx);
- }
-
idr_destroy(&mgr->ctx_handles);
mutex_destroy(&mgr->lock);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 85376baaa92f..090dfe86f75b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -92,7 +92,6 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
struct amdgpu_device *adev);
-void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index cbef720de779..f81608330a3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -402,7 +402,7 @@ static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, siz
int r;
uint32_t *data, x;
- if (size & 0x3 || *pos & 0x3)
+ if (size > 4096 || size & 0x3 || *pos & 0x3)
return -EINVAL;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -1648,7 +1648,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
ent = debugfs_create_file(debugfs_regs_names[i],
- S_IFREG | 0444, root,
+ S_IFREG | 0400, root,
adev, debugfs_regs[i]);
if (!i && !IS_ERR_OR_NULL(ent))
i_size_write(ent->d_inode, adev->rmmio_size);
@@ -1902,7 +1902,7 @@ no_preempt:
continue;
}
job = to_amdgpu_job(s_job);
- if (preempted && (&job->hw_fence) == fence)
+ if (preempted && (&job->hw_fence.base) == fence)
/* mark the job as preempted */
job->preemption_status |= AMDGPU_IB_PREEMPTED;
}
@@ -1990,7 +1990,7 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
uint32_t max_freq, min_freq;
struct amdgpu_device *adev = (struct amdgpu_device *)data;
- if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
+ if (amdgpu_sriov_multi_vf_mode(adev))
return -EINVAL;
ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
@@ -2095,16 +2095,23 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog)
amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm);
+ amdgpu_debugfs_vcn_sched_mask_init(adev);
+ amdgpu_debugfs_jpeg_sched_mask_init(adev);
+ amdgpu_debugfs_gfx_sched_mask_init(adev);
+ amdgpu_debugfs_compute_sched_mask_init(adev);
+ amdgpu_debugfs_sdma_sched_mask_init(adev);
+
amdgpu_ras_debugfs_create_all(adev);
amdgpu_rap_debugfs_init(adev);
amdgpu_securedisplay_debugfs_init(adev);
amdgpu_fw_attestation_debugfs_init(adev);
+ amdgpu_psp_debugfs_init(adev);
- debugfs_create_file("amdgpu_evict_vram", 0444, root, adev,
+ debugfs_create_file("amdgpu_evict_vram", 0400, root, adev,
&amdgpu_evict_vram_fops);
- debugfs_create_file("amdgpu_evict_gtt", 0444, root, adev,
+ debugfs_create_file("amdgpu_evict_gtt", 0400, root, adev,
&amdgpu_evict_gtt_fops);
- debugfs_create_file("amdgpu_test_ib", 0444, root, adev,
+ debugfs_create_file("amdgpu_test_ib", 0400, root, adev,
&amdgpu_debugfs_test_ib_fops);
debugfs_create_file("amdgpu_vm_info", 0444, root, adev,
&amdgpu_debugfs_vm_info_fops);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
index 5ac59b62020c..7b50741dc097 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -203,6 +203,7 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
struct amdgpu_coredump_info *coredump = data;
struct drm_print_iterator iter;
struct amdgpu_vm_fault_info *fault_info;
+ struct amdgpu_ip_block *ip_block;
int ver;
iter.data = buffer;
@@ -282,13 +283,10 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
/* dump the ip state for each ip */
drm_printf(&p, "IP Dump\n");
for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
- if (coredump->adev->ip_blocks[i].version->funcs->print_ip_state) {
- drm_printf(&p, "IP: %s\n",
- coredump->adev->ip_blocks[i]
- .version->funcs->name);
- coredump->adev->ip_blocks[i]
- .version->funcs->print_ip_state(
- (void *)coredump->adev, &p);
+ ip_block = &coredump->adev->ip_blocks[i];
+ if (ip_block->version->funcs->print_ip_state) {
+ drm_printf(&p, "IP: %s\n", ip_block->version->funcs->name);
+ ip_block->version->funcs->print_ip_state(ip_block, &p);
drm_printf(&p, "\n");
}
}
@@ -345,11 +343,10 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
coredump->skip_vram_check = skip_vram_check;
coredump->reset_vram_lost = vram_lost;
- if (job && job->vm) {
- struct amdgpu_vm *vm = job->vm;
+ if (job && job->pasid) {
struct amdgpu_task_info *ti;
- ti = amdgpu_vm_get_task_info_vm(vm);
+ ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid);
if (ti) {
coredump->reset_task_info = *ti;
amdgpu_vm_put_task_info(ti);
@@ -367,5 +364,9 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+
+ drm_info(dev, "AMDGPU device coredump file has been created\n");
+ drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
+ dev->primary->index);
}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c2394c8b4d6b..78f8755996f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -25,6 +25,8 @@
* Alex Deucher
* Jerome Glisse
*/
+
+#include <linux/aperture.h>
#include <linux/power_supply.h>
#include <linux/kthread.h>
#include <linux/module.h>
@@ -35,10 +37,9 @@
#include <linux/pci-p2pdma.h>
#include <linux/apple-gmux.h>
-#include <drm/drm_aperture.h>
#include <drm/drm_atomic_helper.h>
+#include <drm/drm_client_event.h>
#include <drm/drm_crtc_helper.h>
-#include <drm/drm_fb_helper.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
#include <linux/device.h>
@@ -84,6 +85,7 @@
#if IS_ENABLED(CONFIG_X86)
#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
#endif
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
@@ -101,6 +103,9 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
+#define AMDGPU_VBIOS_SKIP (1U << 0)
+#define AMDGPU_VBIOS_OPTIONAL (1U << 1)
+
static const struct drm_driver amdgpu_kms_driver;
const char *amdgpu_asic_name[] = {
@@ -144,15 +149,70 @@ const char *amdgpu_asic_name[] = {
"LAST",
};
+#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
+/*
+ * Default init level where all blocks are expected to be initialized. This is
+ * the level of initialization expected by default and also after a full reset
+ * of the device.
+ */
+struct amdgpu_init_level amdgpu_init_default = {
+ .level = AMDGPU_INIT_LEVEL_DEFAULT,
+ .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
+struct amdgpu_init_level amdgpu_init_recovery = {
+ .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
+ .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
+/*
+ * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
+ * is used for cases like reset on initialization where the entire hive needs to
+ * be reset before first use.
+ */
+struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
+ .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+ .hwini_ip_block_mask =
+ BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) |
+ BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) |
+ BIT(AMD_IP_BLOCK_TYPE_PSP)
+};
+
+static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
+ enum amd_ip_block_type block)
+{
+ return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0;
+}
+
+void amdgpu_set_init_level(struct amdgpu_device *adev,
+ enum amdgpu_init_lvl_id lvl)
+{
+ switch (lvl) {
+ case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
+ adev->init_lvl = &amdgpu_init_minimal_xgmi;
+ break;
+ case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
+ adev->init_lvl = &amdgpu_init_recovery;
+ break;
+ case AMDGPU_INIT_LEVEL_DEFAULT:
+ fallthrough;
+ default:
+ adev->init_lvl = &amdgpu_init_default;
+ break;
+ }
+}
+
static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+ void *data);
/**
* DOC: pcie_replay_count
*
* The amdgpu driver provides a sysfs API for reporting the total number
- * of PCIe replays (NAKs)
+ * of PCIe replays (NAKs).
* The file pcie_replay_count is used for this and returns the total
- * number of replays as a sum of the NAKs generated and NAKs received
+ * number of replays as a sum of the NAKs generated and NAKs received.
*/
static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
@@ -168,8 +228,26 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
static DEVICE_ATTR(pcie_replay_count, 0444,
amdgpu_device_get_pcie_replay_count, NULL);
+static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ if (!amdgpu_sriov_vf(adev))
+ ret = sysfs_create_file(&adev->dev->kobj,
+ &dev_attr_pcie_replay_count.attr);
+
+ return ret;
+}
+
+static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev))
+ sysfs_remove_file(&adev->dev->kobj,
+ &dev_attr_pcie_replay_count.attr);
+}
+
static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
- struct bin_attribute *attr, char *buf,
+ const struct bin_attribute *attr, char *buf,
loff_t ppos, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
@@ -205,8 +283,8 @@ static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
return bytes_read;
}
-BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
- AMDGPU_SYS_REG_STATE_END);
+static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
+ AMDGPU_SYS_REG_STATE_END);
int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
{
@@ -227,6 +305,42 @@ void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
}
+int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ if (ip_block->version->funcs->suspend) {
+ r = ip_block->version->funcs->suspend(ip_block);
+ if (r) {
+ dev_err(ip_block->adev->dev,
+ "suspend of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ return r;
+ }
+ }
+
+ ip_block->status.hw = false;
+ return 0;
+}
+
+int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ if (ip_block->version->funcs->resume) {
+ r = ip_block->version->funcs->resume(ip_block);
+ if (r) {
+ dev_err(ip_block->adev->dev,
+ "resume of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ return r;
+ }
+ }
+
+ ip_block->status.hw = true;
+ return 0;
+}
+
/**
* DOC: board_info
*
@@ -327,6 +441,9 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)
{
struct amdgpu_device *adev = drm_to_adev(dev);
+ if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
+ return false;
+
if (adev->has_pr3 ||
((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
return true;
@@ -339,8 +456,8 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)
* @dev: drm_device pointer
*
* Return:
- * 1 if the device supporte BACO;
- * 3 if the device support MACO (only works if BACO is supported)
+ * 1 if the device supports BACO;
+ * 3 if the device supports MACO (only works if BACO is supported)
* otherwise return 0.
*/
int amdgpu_device_supports_baco(struct drm_device *dev)
@@ -395,12 +512,13 @@ void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
break;
case CHIP_VEGA10:
/* enable BACO as runpm mode if noretry=0 */
- if (!adev->gmc.noretry)
+ if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
break;
default:
/* enable BACO as runpm mode on CI+ */
- adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ if (!amdgpu_passthrough(adev))
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
break;
}
@@ -487,7 +605,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
}
/**
- * amdgpu_device_aper_access - access vram by vram aperature
+ * amdgpu_device_aper_access - access vram by vram aperture
*
* @adev: amdgpu_device pointer
* @pos: offset of the buffer in vram
@@ -578,7 +696,7 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
* here is that the GPU reset is not running on another thread in parallel.
*
* For this we trylock the read side of the reset semaphore, if that succeeds
- * we know that the reset is not running in paralell.
+ * we know that the reset is not running in parallel.
*
* If the trylock fails we assert that we are either already holding the read
* side of the lock or are the reset thread itself and hold the write side of
@@ -1294,6 +1412,17 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
BUG();
}
+static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
+{
+ if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
+ return AMDGPU_VBIOS_SKIP;
+
+ if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
+ return AMDGPU_VBIOS_OPTIONAL;
+
+ return 0;
+}
+
/**
* amdgpu_device_asic_init - Wrapper for atom asic_init
*
@@ -1303,17 +1432,28 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
*/
static int amdgpu_device_asic_init(struct amdgpu_device *adev)
{
+ uint32_t flags;
+ bool optional;
int ret;
amdgpu_asic_pre_asic_init(adev);
+ flags = amdgpu_device_get_vbios_flags(adev);
+ optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP));
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
amdgpu_psp_wait_for_bootloader(adev);
+ if (optional && !adev->bios)
+ return 0;
+
ret = amdgpu_atomfirmware_asic_init(adev, true);
return ret;
} else {
+ if (optional && !adev->bios)
+ return 0;
+
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
}
@@ -1542,6 +1682,16 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return 0;
+ if (!amdgpu_rebar)
+ return 0;
+
+ /* resizing on Dell G5 SE platforms causes problems with runtime pm */
+ if ((amdgpu_runtime_pm != 0) &&
+ adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
+ adev->pdev->device == 0x731f &&
+ adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
+ return 0;
+
/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
DRM_WARN("System can't access extended configuration space, please check!!\n");
@@ -1602,14 +1752,6 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
return 0;
}
-static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
-{
- if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
- return false;
-
- return true;
-}
-
/*
* GPU helpers function.
*/
@@ -1624,12 +1766,15 @@ static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
*/
bool amdgpu_device_need_post(struct amdgpu_device *adev)
{
- uint32_t reg;
+ uint32_t reg, flags;
if (amdgpu_sriov_vf(adev))
return false;
- if (!amdgpu_device_read_bios(adev))
+ flags = amdgpu_device_get_vbios_flags(adev);
+ if (flags & AMDGPU_VBIOS_SKIP)
+ return false;
+ if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
return false;
if (amdgpu_passthrough(adev)) {
@@ -1643,7 +1788,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
uint32_t fw_ver;
err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
- /* force vPost if error occured */
+ /* force vPost if error occurred */
if (err)
return true;
@@ -1655,7 +1800,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
}
/* Don't post if we need to reset whole hive on init */
- if (adev->gmc.xgmi.pending_reset)
+ if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
return false;
if (adev->has_hw_reset) {
@@ -1730,6 +1875,35 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device
return true;
}
+static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
+{
+#if IS_ENABLED(CONFIG_X86)
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (!(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 0) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(12, 0, 1)))
+ return false;
+
+ if (c->x86 == 6 &&
+ adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
+ switch (c->x86_model) {
+ case VFM_MODEL(INTEL_ALDERLAKE):
+ case VFM_MODEL(INTEL_ALDERLAKE_L):
+ case VFM_MODEL(INTEL_RAPTORLAKE):
+ case VFM_MODEL(INTEL_RAPTORLAKE_P):
+ case VFM_MODEL(INTEL_RAPTORLAKE_S):
+ return true;
+ default:
+ return false;
+ }
+ } else {
+ return false;
+ }
+#else
+ return false;
+#endif
+}
+
/**
* amdgpu_device_should_use_aspm - check if the device should program ASPM
*
@@ -1754,7 +1928,7 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
}
if (adev->flags & AMD_IS_APU)
return false;
- if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
+ if (amdgpu_device_aspm_support_quirk(adev))
return false;
return pcie_aspm_enabled(adev->pdev);
}
@@ -1972,8 +2146,31 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
- for (i = 0; i < MAX_XCP; i++)
- adev->enforce_isolation[i] = !!enforce_isolation;
+ for (i = 0; i < MAX_XCP; i++) {
+ switch (amdgpu_enforce_isolation) {
+ case -1:
+ case 0:
+ default:
+ /* disable */
+ adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+ break;
+ case 1:
+ /* enable */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE;
+ break;
+ case 2:
+ /* enable legacy mode */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+ break;
+ case 3:
+ /* enable only process isolation without submitting cleaner shader */
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+ break;
+ }
+ }
return 0;
}
@@ -2075,7 +2272,7 @@ int amdgpu_device_ip_set_clockgating_state(void *dev,
if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
continue;
r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
- (void *)adev, state);
+ &adev->ip_blocks[i], state);
if (r)
DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@@ -2109,7 +2306,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
continue;
r = adev->ip_blocks[i].version->funcs->set_powergating_state(
- (void *)adev, state);
+ &adev->ip_blocks[i], state);
if (r)
DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@@ -2137,7 +2334,8 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
- adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
+ adev->ip_blocks[i].version->funcs->get_clockgating_state(
+ &adev->ip_blocks[i], flags);
}
}
@@ -2159,9 +2357,12 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->type == block_type) {
- r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
- if (r)
- return r;
+ if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
+ r = adev->ip_blocks[i].version->funcs->wait_for_idle(
+ &adev->ip_blocks[i]);
+ if (r)
+ return r;
+ }
break;
}
}
@@ -2170,26 +2371,24 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
}
/**
- * amdgpu_device_ip_is_idle - is the hardware IP idle
+ * amdgpu_device_ip_is_valid - is the hardware IP enabled
*
* @adev: amdgpu_device pointer
* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
*
- * Check if the hardware IP is idle or not.
- * Returns true if it the IP is idle, false if not.
+ * Check if the hardware IP is enable or not.
+ * Returns true if it the IP is enable, false if not.
*/
-bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
- enum amd_ip_block_type block_type)
+bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
+ enum amd_ip_block_type block_type)
{
int i;
for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
if (adev->ip_blocks[i].version->type == block_type)
- return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
+ return adev->ip_blocks[i].status.valid;
}
- return true;
+ return false;
}
@@ -2268,8 +2467,10 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
break;
}
- DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
- ip_block_version->funcs->name);
+ dev_info(adev->dev, "detected ip block number %d <%s>\n",
+ adev->num_ip_blocks, ip_block_version->funcs->name);
+
+ adev->ip_blocks[adev->num_ip_blocks].adev = adev;
adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
@@ -2285,7 +2486,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
* the module parameter virtual_display. This feature provides a virtual
* display hardware on headless boards or in virtualized environments.
* This function parses and validates the configuration string specified by
- * the user and configues the virtual display configuration (number of
+ * the user and configures the virtual display configuration (number of
* virtual connectors, crtcs, etc.) specified.
*/
static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
@@ -2348,7 +2549,7 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
* @adev: amdgpu_device pointer
*
* Parses the asic configuration parameters specified in the gpu info
- * firmware and makes them availale to the driver for use in configuring
+ * firmware and makes them available to the driver for use in configuring
* the asic.
* Returns 0 on success, -EINVAL on failure.
*/
@@ -2389,6 +2590,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_gpu_info.bin", chip_name);
if (err) {
dev_err(adev->dev,
@@ -2408,7 +2610,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
/*
- * Should be droped when DAL no longer needs it.
+ * Should be dropped when DAL no longer needs it.
*/
if (adev->asic_type == CHIP_NAVI12)
goto parse_soc_bounding_box;
@@ -2478,8 +2680,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
{
struct amdgpu_ip_block *ip_block;
struct pci_dev *parent;
+ bool total, skip_bios;
+ uint32_t bios_flags;
int i, r;
- bool total;
amdgpu_device_enable_virtual_display(adev);
@@ -2543,6 +2746,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
break;
}
+ /* Check for IP version 9.4.3 with A0 hardware */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
+ !amdgpu_device_get_rev_id(adev)) {
+ dev_err(adev->dev, "Unsupported A0 hardware\n");
+ return -ENODEV; /* device unsupported - no device error */
+ }
+
if (amdgpu_has_atpx() &&
(amdgpu_is_atpx_hybrid() ||
amdgpu_has_atpx_dgpu_power_cntl()) &&
@@ -2555,7 +2765,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
}
-
adev->pm.pp_feature = amdgpu_pp_feature_mask;
if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
@@ -2566,25 +2775,25 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
total = true;
for (i = 0; i < adev->num_ip_blocks; i++) {
+ ip_block = &adev->ip_blocks[i];
+
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
DRM_WARN("disabled ip block: %d <%s>\n",
i, adev->ip_blocks[i].version->funcs->name);
adev->ip_blocks[i].status.valid = false;
- } else {
- if (adev->ip_blocks[i].version->funcs->early_init) {
- r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
- if (r == -ENOENT) {
- adev->ip_blocks[i].status.valid = false;
- } else if (r) {
- DRM_ERROR("early_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- total = false;
- } else {
- adev->ip_blocks[i].status.valid = true;
- }
+ } else if (ip_block->version->funcs->early_init) {
+ r = ip_block->version->funcs->early_init(ip_block);
+ if (r == -ENOENT) {
+ adev->ip_blocks[i].status.valid = false;
+ } else if (r) {
+ DRM_ERROR("early_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ total = false;
} else {
adev->ip_blocks[i].status.valid = true;
}
+ } else {
+ adev->ip_blocks[i].status.valid = true;
}
/* get the vbios after the asic_funcs are set up */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
@@ -2592,16 +2801,31 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (r)
return r;
+ bios_flags = amdgpu_device_get_vbios_flags(adev);
+ skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
/* Read BIOS */
- if (amdgpu_device_read_bios(adev)) {
- if (!amdgpu_get_bios(adev))
+ if (!skip_bios) {
+ bool optional =
+ !!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
+ if (!amdgpu_get_bios(adev) && !optional)
return -EINVAL;
- r = amdgpu_atombios_init(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atombios_init failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
- return r;
+ if (optional && !adev->bios)
+ dev_info(
+ adev->dev,
+ "VBIOS image optional, proceeding without VBIOS image");
+
+ if (adev->bios) {
+ r = amdgpu_atombios_init(adev);
+ if (r) {
+ dev_err(adev->dev,
+ "amdgpu_atombios_init failed\n");
+ amdgpu_vf_error_put(
+ adev,
+ AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
+ 0, 0);
+ return r;
+ }
}
}
@@ -2614,6 +2838,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (!total)
return -ENODEV;
+ if (adev->gmc.xgmi.supported)
+ amdgpu_xgmi_early_init(adev);
+
ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
if (ip_block->status.valid != false)
amdgpu_amdkfd_device_probe(adev);
@@ -2633,10 +2860,13 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
(amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
DRM_ERROR("hw_init of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@@ -2658,7 +2888,10 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hw)
continue;
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
DRM_ERROR("hw_init of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@@ -2681,6 +2914,10 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
continue;
+ if (!amdgpu_ip_member_of_hwini(adev,
+ AMD_IP_BLOCK_TYPE_PSP))
+ break;
+
if (!adev->ip_blocks[i].status.sw)
continue;
@@ -2689,22 +2926,18 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
break;
if (amdgpu_in_reset(adev) || adev->in_suspend) {
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
} else {
- r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
DRM_ERROR("hw_init of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
+ adev->ip_blocks[i].status.hw = true;
}
-
- adev->ip_blocks[i].status.hw = true;
break;
}
}
@@ -2717,6 +2950,12 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
{
+ struct drm_sched_init_args args = {
+ .ops = &amdgpu_sched_ops,
+ .num_rqs = DRM_SCHED_PRIORITY_COUNT,
+ .timeout_wq = adev->reset_domain->wq,
+ .dev = adev->dev,
+ };
long timeout;
int r, i;
@@ -2742,12 +2981,12 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
break;
}
- r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
- DRM_SCHED_PRIORITY_COUNT,
- ring->num_hw_submission, 0,
- timeout, adev->reset_domain->wq,
- ring->sched_score, ring->name,
- adev->dev);
+ args.timeout = timeout;
+ args.credit_limit = ring->num_hw_submission;
+ args.score = ring->sched_score;
+ args.name = ring->name;
+
+ r = drm_sched_init(&ring->sched, &args);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
ring->name);
@@ -2786,6 +3025,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
*/
static int amdgpu_device_ip_init(struct amdgpu_device *adev)
{
+ bool init_badpage;
int i, r;
r = amdgpu_ras_init(adev);
@@ -2795,17 +3035,23 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
- r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
- if (r) {
- DRM_ERROR("sw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- goto init_failed;
+ if (adev->ip_blocks[i].version->funcs->sw_init) {
+ r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
+ if (r) {
+ DRM_ERROR("sw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ goto init_failed;
+ }
}
adev->ip_blocks[i].status.sw = true;
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
+ continue;
+
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
/* need to do common hw init early so everything is set up for gmc */
- r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
DRM_ERROR("hw_init %d failed %d\n", i, r);
goto init_failed;
@@ -2822,7 +3068,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
goto init_failed;
}
- r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
if (r) {
DRM_ERROR("hw_init %d failed %d\n", i, r);
goto init_failed;
@@ -2895,7 +3141,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
* Note: theoretically, this should be called before all vram allocations
* to protect retired page from abusing
*/
- r = amdgpu_ras_recovery_init(adev);
+ init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
+ r = amdgpu_ras_recovery_init(adev, init_badpage);
if (r)
goto init_failed;
@@ -2935,13 +3182,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
/* Don't init kfd if whole hive need to be reset during init */
- if (!adev->gmc.xgmi.pending_reset) {
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
kgd2kfd_init_zone_device(adev);
amdgpu_amdkfd_device_init(adev);
}
amdgpu_fru_get_product_info(adev);
+ if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev))
+ r = amdgpu_cper_init(adev);
+
init_failed:
return r;
@@ -2954,7 +3204,7 @@ init_failed:
*
* Writes a reset magic value to the gart pointer in VRAM. The driver calls
* this function before a GPU reset. If the value is retained after a
- * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
+ * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
*/
static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
{
@@ -2985,6 +3235,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
* always assumed to be lost.
*/
switch (amdgpu_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_LINK:
case AMD_RESET_METHOD_BACO:
case AMD_RESET_METHOD_MODE1:
return true;
@@ -3030,7 +3281,7 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+ r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
state);
if (r) {
DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
@@ -3067,7 +3318,7 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
adev->ip_blocks[i].version->funcs->set_powergating_state) {
/* enable powergating to save power */
- r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
+ r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
state);
if (r) {
DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
@@ -3135,7 +3386,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->funcs->late_init) {
- r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
if (r) {
DRM_ERROR("late_init of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@@ -3151,7 +3402,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
return r;
}
- if (!amdgpu_in_reset(adev))
+ if (!amdgpu_reset_in_recovery(adev))
amdgpu_ras_set_error_query_ready(adev, true);
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
@@ -3206,6 +3457,25 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
return 0;
}
+static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ int r;
+
+ if (!ip_block->version->funcs->hw_fini) {
+ DRM_ERROR("hw_fini of IP block <%s> not defined\n",
+ ip_block->version->funcs->name);
+ } else {
+ r = ip_block->version->funcs->hw_fini(ip_block);
+ /* XXX handle errors */
+ if (r) {
+ DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
+ ip_block->version->funcs->name, r);
+ }
+ }
+
+ ip_block->status.hw = false;
+}
+
/**
* amdgpu_device_smu_fini_early - smu hw_fini wrapper
*
@@ -3215,7 +3485,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
*/
static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
{
- int i, r;
+ int i;
if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
return;
@@ -3224,13 +3494,7 @@ static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
- r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
- /* XXX handle errors */
- if (r) {
- DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
- adev->ip_blocks[i].status.hw = false;
+ amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
break;
}
}
@@ -3244,7 +3508,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].version->funcs->early_fini)
continue;
- r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
if (r) {
DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@@ -3255,22 +3519,16 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
amdgpu_amdkfd_suspend(adev, false);
+ amdgpu_userq_suspend(adev);
- /* Workaroud for ASICs need to disable SMC first */
+ /* Workaround for ASICs need to disable SMC first */
amdgpu_device_smu_fini_early(adev);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.hw)
continue;
- r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
- /* XXX handle errors */
- if (r) {
- DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
-
- adev->ip_blocks[i].status.hw = false;
+ amdgpu_ip_block_hw_fini(&adev->ip_blocks[i]);
}
if (amdgpu_sriov_vf(adev)) {
@@ -3296,6 +3554,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
{
int i, r;
+ amdgpu_cper_fini(adev);
+
if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
amdgpu_virt_release_ras_err_handler_data(adev);
@@ -3315,13 +3575,15 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
amdgpu_device_mem_scratch_fini(adev);
amdgpu_ib_pool_fini(adev);
amdgpu_seq64_fini(adev);
+ amdgpu_doorbell_fini(adev);
}
-
- r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
- /* XXX handle errors */
- if (r) {
- DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ if (adev->ip_blocks[i].version->funcs->sw_fini) {
+ r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
+ /* XXX handle errors */
+ if (r) {
+ DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ }
}
adev->ip_blocks[i].status.sw = false;
adev->ip_blocks[i].status.valid = false;
@@ -3331,7 +3593,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.late_initialized)
continue;
if (adev->ip_blocks[i].version->funcs->late_fini)
- adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
+ adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
adev->ip_blocks[i].status.late_initialized = false;
}
@@ -3364,7 +3626,7 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
WARN_ON_ONCE(adev->gfx.gfx_off_state);
WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
- if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
+ if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0))
adev->gfx.gfx_off_state = true;
}
@@ -3403,15 +3665,9 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
continue;
/* XXX handle errors */
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
- /* XXX handle errors */
- if (r) {
- DRM_ERROR("suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
-
- adev->ip_blocks[i].status.hw = false;
}
return 0;
@@ -3449,15 +3705,17 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
}
/* skip unnecessary suspend if we do not initialize them yet */
- if (adev->gmc.xgmi.pending_reset &&
- !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
- adev->ip_blocks[i].status.hw = false;
+ if (!amdgpu_ip_member_of_hwini(
+ adev, adev->ip_blocks[i].version->type))
continue;
- }
+ /* Since we skip suspend for S0i3, we need to cancel the delayed
+ * idle work here as the suspend callback never gets called.
+ */
+ if (adev->in_s0ix &&
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
/* skip suspend of gfx/mes and psp for S0ix
* gfx is in gfxoff state, so on resume it will exit gfxoff just
* like at runtime. PSP is also part of the always on hardware
@@ -3490,13 +3748,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
continue;
/* XXX handle errors */
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
- /* XXX handle errors */
- if (r) {
- DRM_ERROR("suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- }
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
adev->ip_blocks[i].status.hw = false;
+
/* handle putting the SMC in the appropriate state */
if (!amdgpu_sriov_vf(adev)) {
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
@@ -3570,10 +3824,12 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
!block->status.valid)
continue;
- r = block->version->funcs->hw_init(adev);
- DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
- if (r)
+ r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
+ if (r) {
+ dev_err(adev->dev, "RE-INIT-early: %s failed\n",
+ block->version->funcs->name);
return r;
+ }
block->status.hw = true;
}
}
@@ -3583,7 +3839,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
{
- int i, r;
+ struct amdgpu_ip_block *block;
+ int i, r = 0;
static enum amd_ip_block_type ip_order[] = {
AMD_IP_BLOCK_TYPE_SMC,
@@ -3598,30 +3855,28 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
};
for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
- int j;
- struct amdgpu_ip_block *block;
-
- for (j = 0; j < adev->num_ip_blocks; j++) {
- block = &adev->ip_blocks[j];
+ block = amdgpu_device_ip_get_ip_block(adev, ip_order[i]);
- if (block->version->type != ip_order[i] ||
- !block->status.valid ||
- block->status.hw)
- continue;
+ if (!block)
+ continue;
- if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
- r = block->version->funcs->resume(adev);
- else
- r = block->version->funcs->hw_init(adev);
+ if (block->status.valid && !block->status.hw) {
+ if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
+ r = amdgpu_ip_block_resume(block);
+ } else {
+ r = block->version->funcs->hw_init(block);
+ }
- DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
- if (r)
- return r;
+ if (r) {
+ dev_err(adev->dev, "RE-INIT-late: %s failed\n",
+ block->version->funcs->name);
+ break;
+ }
block->status.hw = true;
}
}
- return 0;
+ return r;
}
/**
@@ -3648,13 +3903,9 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
- adev->ip_blocks[i].status.hw = true;
}
}
@@ -3666,7 +3917,7 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
*
* @adev: amdgpu_device pointer
*
- * First resume function for hardware IPs. The list of all the hardware
+ * Second resume function for hardware IPs. The list of all the hardware
* IPs that make up the asic is walked and the resume callbacks are run for
* all blocks except COMMON, GMC, and IH. resume puts the hardware into a
* functional state after a suspend and updates the software state as
@@ -3684,15 +3935,42 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
continue;
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- DRM_ERROR("resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Third resume function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all DCE. resume puts the hardware into a functional state after a suspend
+ * and updates the software state as necessary. This function is also used
+ * for restoring the GPU after a GPU reset.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
+ continue;
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
+ return r;
}
- adev->ip_blocks[i].status.hw = true;
}
return 0;
@@ -3727,6 +4005,13 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
if (adev->mman.buffer_funcs_ring->sched.ready)
amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ if (r)
+ return r;
+
+ amdgpu_fence_driver_hw_init(adev);
+
+ r = amdgpu_device_ip_resume_phase3(adev);
+
return r;
}
@@ -3978,11 +4263,6 @@ static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
}
#endif
-static const struct attribute *amdgpu_dev_attributes[] = {
- &dev_attr_pcie_replay_count.attr,
- NULL
-};
-
static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
{
if (amdgpu_mcbp == 1)
@@ -4079,7 +4359,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
- mutex_init(&adev->virt.rlcg_reg_lock);
hash_init(adev->mn_hash);
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
@@ -4088,7 +4367,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->gfx.reset_sem_mutex);
/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
mutex_init(&adev->enforce_isolation_mutex);
- mutex_init(&adev->gfx.kfd_sch_mutex);
+ for (i = 0; i < MAX_XCP; ++i) {
+ adev->isolation[i].spearhead = dma_fence_get_stub();
+ amdgpu_sync_create(&adev->isolation[i].active);
+ amdgpu_sync_create(&adev->isolation[i].prev);
+ }
+ mutex_init(&adev->gfx.userq_sch_mutex);
+ mutex_init(&adev->gfx.workload_profile_mutex);
+ mutex_init(&adev->vcn.workload_profile_mutex);
+ mutex_init(&adev->userq_mutex);
amdgpu_device_init_apu_flags(adev);
@@ -4105,14 +4392,19 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->se_cac_idx_lock);
spin_lock_init(&adev->audio_endpt_idx_lock);
spin_lock_init(&adev->mm_stats.lock);
+ spin_lock_init(&adev->virt.rlcg_reg_lock);
spin_lock_init(&adev->wb.lock);
+ xa_init_flags(&adev->userq_xa, XA_FLAGS_LOCK_IRQ);
+
INIT_LIST_HEAD(&adev->reset_list);
INIT_LIST_HEAD(&adev->ras_list);
INIT_LIST_HEAD(&adev->pm.od_kobj_list);
+ INIT_LIST_HEAD(&adev->userq_mgr_list);
+
INIT_DELAYED_WORK(&adev->delayed_init_work,
amdgpu_device_delayed_init_work_handler);
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
@@ -4149,6 +4441,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* for throttling interrupt) = 60 seconds.
*/
ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
+
ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
/* Registers mapping */
@@ -4173,7 +4466,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/*
* Reset domain needs to be present early, before XGMI hive discovered
- * (if any) and intitialized to use reset sem and in_gpu reset flag
+ * (if any) and initialized to use reset sem and in_gpu reset flag
* early on during init and before calling to RREG32.
*/
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
@@ -4181,7 +4474,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return -ENOMEM;
/* detect hw virtualization here */
- amdgpu_detect_virtualization(adev);
+ amdgpu_virt_init(adev);
amdgpu_device_get_pcie_info(adev);
@@ -4193,15 +4486,28 @@ int amdgpu_device_init(struct amdgpu_device *adev,
amdgpu_device_set_mcbp(adev);
+ /*
+ * By default, use default mode where all blocks are expected to be
+ * initialized. At present a 'swinit' of blocks is required to be
+ * completed before the need for a different level is detected.
+ */
+ amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT);
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
return r;
- /* Get rid of things like offb */
- r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
- if (r)
- return r;
+ /*
+ * No need to remove conflicting FBs for non-display class devices.
+ * This prevents the sysfb from being freed accidently.
+ */
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
+ (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
+ /* Get rid of things like offb */
+ r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
+ if (r)
+ return r;
+ }
/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
@@ -4265,20 +4571,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
if (adev->gmc.xgmi.num_physical_nodes) {
dev_info(adev->dev, "Pending hive reset.\n");
- adev->gmc.xgmi.pending_reset = true;
- /* Only need to init necessary block for SMU to handle the reset */
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
- continue;
- if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
- DRM_DEBUG("IP %s disabled for hw_init.\n",
- adev->ip_blocks[i].version->funcs->name);
- adev->ip_blocks[i].status.hw = true;
- }
- }
+ amdgpu_set_init_level(adev,
+ AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
} else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
!amdgpu_device_has_display_hardware(adev)) {
r = psp_gpu_reset(adev);
@@ -4331,8 +4625,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}
/* init i2c buses */
- if (!amdgpu_device_has_dc_support(adev))
- amdgpu_atombios_i2c_init(adev);
+ amdgpu_i2c_init(adev);
}
}
@@ -4386,7 +4679,7 @@ fence_driver_init:
/* enable clockgating, etc. after ib tests, etc. since some blocks require
* explicit gating rather than handling it automatically.
*/
- if (!adev->gmc.xgmi.pending_reset) {
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
r = amdgpu_device_ip_late_init(adev);
if (r) {
dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
@@ -4425,7 +4718,7 @@ fence_driver_init:
} else
adev->ucode_sysfs_en = true;
- r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
+ r = amdgpu_device_attr_sysfs_init(adev);
if (r)
dev_err(adev->dev, "Could not create amdgpu device attr\n");
@@ -4436,6 +4729,7 @@ fence_driver_init:
amdgpu_fru_sysfs_init(adev);
amdgpu_reg_state_sysfs_init(adev);
+ amdgpu_xcp_sysfs_init(adev);
if (IS_ENABLED(CONFIG_PERF_EVENTS))
r = amdgpu_pmu_init(adev);
@@ -4463,12 +4757,16 @@ fence_driver_init:
if (px)
vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
- if (adev->gmc.xgmi.pending_reset)
- queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
- msecs_to_jiffies(AMDGPU_RESUME_MS));
+ if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
+ amdgpu_xgmi_reset_on_init(adev);
amdgpu_device_check_iommu_direct_map(adev);
+ adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
+ r = register_pm_notifier(&adev->pm_nb);
+ if (r)
+ goto failed;
+
return 0;
release_ras_con:
@@ -4533,6 +4831,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
drain_workqueue(adev->mman.bdev.wq);
adev->shutdown = true;
+ unregister_pm_notifier(&adev->pm_nb);
+
/* make sure IB test finished before entering exclusive mode
* to avoid preemption on IB test
*/
@@ -4555,10 +4855,11 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
amdgpu_pm_sysfs_fini(adev);
if (adev->ucode_sysfs_en)
amdgpu_ucode_sysfs_fini(adev);
- sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
+ amdgpu_device_attr_sysfs_fini(adev);
amdgpu_fru_sysfs_fini(adev);
amdgpu_reg_state_sysfs_fini(adev);
+ amdgpu_xcp_sysfs_fini(adev);
/* disable ras feature must before hw fini */
amdgpu_ras_pre_fini(adev);
@@ -4581,30 +4882,37 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
void amdgpu_device_fini_sw(struct amdgpu_device *adev)
{
- int idx;
+ int i, idx;
bool px;
- amdgpu_fence_driver_sw_fini(adev);
amdgpu_device_ip_fini(adev);
+ amdgpu_fence_driver_sw_fini(adev);
amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
adev->accel_working = false;
dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
+ for (i = 0; i < MAX_XCP; ++i) {
+ dma_fence_put(adev->isolation[i].spearhead);
+ amdgpu_sync_free(&adev->isolation[i].active);
+ amdgpu_sync_free(&adev->isolation[i].prev);
+ }
amdgpu_reset_fini(adev);
/* free i2c buses */
- if (!amdgpu_device_has_dc_support(adev))
- amdgpu_i2c_fini(adev);
+ amdgpu_i2c_fini(adev);
- if (amdgpu_emu_mode != 1)
- amdgpu_atombios_fini(adev);
-
- kfree(adev->bios);
- adev->bios = NULL;
+ if (adev->bios) {
+ if (amdgpu_emu_mode != 1)
+ amdgpu_atombios_fini(adev);
+ amdgpu_bios_release(adev);
+ }
kfree(adev->fru_info);
adev->fru_info = NULL;
+ kfree(adev->xcp_mgr);
+ adev->xcp_mgr = NULL;
+
px = amdgpu_device_supports_px(adev_to_drm(adev));
if (px || (!dev_is_removable(&adev->pdev->dev) &&
@@ -4621,7 +4929,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
iounmap(adev->rmmio);
adev->rmmio = NULL;
- amdgpu_doorbell_fini(adev);
drm_dev_exit(idx);
}
@@ -4650,8 +4957,8 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
{
int ret;
- /* No need to evict vram on APUs for suspend to ram or s2idle */
- if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
+ /* No need to evict vram on APUs unless going to S4 */
+ if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
return 0;
ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
@@ -4664,6 +4971,33 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
* Suspend & resume.
*/
/**
+ * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
+ * @nb: notifier block
+ * @mode: suspend mode
+ * @data: data
+ *
+ * This function is called when the system is about to suspend or hibernate.
+ * It is used to set the appropriate flags so that eviction can be optimized
+ * in the pm prepare callback.
+ */
+static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode,
+ void *data)
+{
+ struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb);
+
+ switch (mode) {
+ case PM_HIBERNATION_PREPARE:
+ adev->in_s4 = true;
+ break;
+ case PM_POST_HIBERNATION:
+ adev->in_s4 = false;
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
* amdgpu_device_prepare - prepare for device suspend
*
* @dev: drm dev pointer
@@ -4677,15 +5011,13 @@ int amdgpu_device_prepare(struct drm_device *dev)
struct amdgpu_device *adev = drm_to_adev(dev);
int i, r;
- amdgpu_choose_low_power_state(adev);
-
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
/* Evict the majority of BOs before starting suspend sequence */
r = amdgpu_device_evict_resources(adev);
if (r)
- goto unprepare;
+ return r;
flush_delayed_work(&adev->gfx.gfx_off_delay_work);
@@ -4694,30 +5026,25 @@ int amdgpu_device_prepare(struct drm_device *dev)
continue;
if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
continue;
- r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
+ r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
if (r)
- goto unprepare;
+ return r;
}
return 0;
-
-unprepare:
- adev->in_s0ix = adev->in_s3 = false;
-
- return r;
}
/**
* amdgpu_device_suspend - initiate device suspend
*
* @dev: drm dev pointer
- * @fbcon : notify the fbdev of suspend
+ * @notify_clients: notify in-kernel DRM clients
*
* Puts the hw in the suspend state (all asics).
* Returns 0 for success or an error on failure.
* Called at driver suspend.
*/
-int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
+int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
{
struct amdgpu_device *adev = drm_to_adev(dev);
int r = 0;
@@ -4737,8 +5064,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
DRM_WARN("smart shift update failed\n");
- if (fbcon)
- drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
+ if (notify_clients)
+ drm_client_dev_suspend(adev_to_drm(adev), false);
cancel_delayed_work_sync(&adev->delayed_init_work);
@@ -4746,8 +5073,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
amdgpu_device_ip_suspend_phase1(adev);
- if (!adev->in_s0ix)
+ if (!adev->in_s0ix) {
amdgpu_amdkfd_suspend(adev, adev->in_runpm);
+ amdgpu_userq_suspend(adev);
+ }
r = amdgpu_device_evict_resources(adev);
if (r)
@@ -4773,13 +5102,13 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
* amdgpu_device_resume - initiate device resume
*
* @dev: drm dev pointer
- * @fbcon : notify the fbdev of resume
+ * @notify_clients: notify in-kernel DRM clients
*
* Bring the hw back to operating state (all asics).
* Returns 0 for success or an error on failure.
* Called at driver resume.
*/
-int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
+int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
{
struct amdgpu_device *adev = drm_to_adev(dev);
int r = 0;
@@ -4809,12 +5138,15 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
goto exit;
}
- amdgpu_fence_driver_hw_init(adev);
if (!adev->in_s0ix) {
r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
if (r)
goto exit;
+
+ r = amdgpu_userq_resume(adev);
+ if (r)
+ goto exit;
}
r = amdgpu_device_ip_late_init(adev);
@@ -4835,8 +5167,8 @@ exit:
/* Make sure IB tests flushed */
flush_delayed_work(&adev->delayed_init_work);
- if (fbcon)
- drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
+ if (notify_clients)
+ drm_client_dev_resume(adev_to_drm(adev), false);
amdgpu_ras_resume(adev);
@@ -4863,9 +5195,6 @@ exit:
}
adev->in_suspend = false;
- if (adev->enable_mes)
- amdgpu_mes_self_test(adev);
-
if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
DRM_WARN("smart shift update failed\n");
@@ -4898,7 +5227,8 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->funcs->check_soft_reset)
adev->ip_blocks[i].status.hang =
- adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
+ adev->ip_blocks[i].version->funcs->check_soft_reset(
+ &adev->ip_blocks[i]);
if (adev->ip_blocks[i].status.hang) {
dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
asic_hang = true;
@@ -4927,7 +5257,7 @@ static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->pre_soft_reset) {
- r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -4989,7 +5319,7 @@ static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->soft_reset) {
- r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -5018,7 +5348,7 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].status.hang &&
adev->ip_blocks[i].version->funcs->post_soft_reset)
- r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
+ r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
if (r)
return r;
}
@@ -5053,7 +5383,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (r)
return r;
- amdgpu_ras_set_fed(adev, false);
+ amdgpu_ras_clear_err_state(adev);
amdgpu_irq_gpu_reset_resume_helper(adev);
/* some sw clean up VF needs to do before recover */
@@ -5101,22 +5431,28 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
amdgpu_ras_resume(adev);
+
+ amdgpu_virt_ras_telemetry_post_reset(adev);
+
return 0;
}
/**
- * amdgpu_device_has_job_running - check if there is any job in mirror list
+ * amdgpu_device_has_job_running - check if there is any unfinished job
*
* @adev: amdgpu_device pointer
*
- * check if there is any job in mirror list
+ * check if there is any job running on the device when guest driver receives
+ * FLR notification from host driver. If there are still jobs running, then
+ * the guest driver will not respond the FLR reset. Instead, let the job hit
+ * the timeout and guest driver then issue the reset request.
*/
bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
{
int i;
- struct drm_sched_job *job;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -5124,11 +5460,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
if (!amdgpu_ring_sched_ready(ring))
continue;
- spin_lock(&ring->sched.job_list_lock);
- job = list_first_entry_or_null(&ring->sched.pending_list,
- struct drm_sched_job, list);
- spin_unlock(&ring->sched.job_list_lock);
- if (job)
+ if (amdgpu_fence_count_emitted(ring))
return true;
}
return false;
@@ -5190,7 +5522,8 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
u32 i;
int ret = 0;
- amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+ if (adev->bios)
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
dev_info(adev->dev, "GPU mode1 reset\n");
@@ -5232,7 +5565,8 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
goto mode1_reset_failed;
}
- amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+ if (adev->bios)
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
return 0;
@@ -5241,6 +5575,29 @@ mode1_reset_failed:
return ret;
}
+int amdgpu_device_link_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ dev_info(adev->dev, "GPU link reset\n");
+
+ if (!adev->pcie_reset_ctx.occurs_dpc)
+ ret = amdgpu_dpm_link_reset(adev);
+
+ if (ret)
+ goto link_reset_failed;
+
+ ret = amdgpu_psp_wait_for_bootloader(adev);
+ if (ret)
+ goto link_reset_failed;
+
+ return 0;
+
+link_reset_failed:
+ dev_err(adev->dev, "GPU link reset failed\n");
+ return ret;
+}
+
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context)
{
@@ -5309,7 +5666,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
for (i = 0; i < tmp_adev->num_ip_blocks; i++)
if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
tmp_adev->ip_blocks[i].version->funcs
- ->dump_ip_state((void *)tmp_adev);
+ ->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
}
@@ -5325,76 +5682,35 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
return r;
}
-int amdgpu_do_asic_reset(struct list_head *device_list_handle,
- struct amdgpu_reset_context *reset_context)
+int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
{
- struct amdgpu_device *tmp_adev = NULL;
- bool need_full_reset, skip_hw_reset, vram_lost = false;
- int r = 0;
+ struct list_head *device_list_handle;
+ bool full_reset, vram_lost = false;
+ struct amdgpu_device *tmp_adev;
+ int r, init_level;
- /* Try reset handler method first */
- tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
- reset_list);
+ device_list_handle = reset_context->reset_device_list;
- reset_context->reset_device_list = device_list_handle;
- r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
- /* If reset handler not implemented, continue; otherwise return */
- if (r == -EOPNOTSUPP)
- r = 0;
- else
- return r;
+ if (!device_list_handle)
+ return -EINVAL;
- /* Reset handler not implemented, use the default method */
- need_full_reset =
- test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
- skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
+ full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
- /*
- * ASIC reset has to be done on all XGMI hive nodes ASAP
- * to allow proper links negotiation in FW (within 1 sec)
+ /**
+ * If it's reset on init, it's default init level, otherwise keep level
+ * as recovery level.
*/
- if (!skip_hw_reset && need_full_reset) {
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- /* For XGMI run all resets in parallel to speed up the process */
- if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
- tmp_adev->gmc.xgmi.pending_reset = false;
- if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
- r = -EALREADY;
- } else
- r = amdgpu_asic_reset(tmp_adev);
-
- if (r) {
- dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
- r, adev_to_drm(tmp_adev)->unique);
- goto out;
- }
- }
-
- /* For XGMI wait for all resets to complete before proceed */
- if (!r) {
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
- flush_work(&tmp_adev->xgmi_reset_work);
- r = tmp_adev->asic_reset_res;
- if (r)
- break;
- }
- }
- }
- }
-
- if (!r && amdgpu_ras_intr_triggered()) {
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
- }
-
- amdgpu_ras_intr_cleared();
- }
+ if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
+ init_level = AMDGPU_INIT_LEVEL_DEFAULT;
+ else
+ init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
+ r = 0;
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- if (need_full_reset) {
+ amdgpu_set_init_level(tmp_adev, init_level);
+ if (full_reset) {
/* post card */
- amdgpu_ras_set_fed(tmp_adev, false);
+ amdgpu_ras_clear_err_state(tmp_adev);
r = amdgpu_device_asic_init(tmp_adev);
if (r) {
dev_warn(tmp_adev->dev, "asic atom init failed!");
@@ -5431,6 +5747,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
+ r = amdgpu_device_ip_resume_phase3(tmp_adev);
+ if (r)
+ goto out;
+
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
@@ -5448,7 +5768,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
if (r)
goto out;
- drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
+ drm_client_dev_resume(adev_to_drm(tmp_adev), false);
/*
* The GPU enters bad state once faulty pages
@@ -5478,11 +5798,13 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
out:
if (!r) {
+ /* IP init is complete now, set level as default */
+ amdgpu_set_init_level(tmp_adev,
+ AMDGPU_INIT_LEVEL_DEFAULT);
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
r = amdgpu_ib_ring_tests(tmp_adev);
if (r) {
dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
- need_full_reset = true;
r = -EAGAIN;
goto end;
}
@@ -5493,10 +5815,85 @@ out:
}
end:
- if (need_full_reset)
+ return r;
+}
+
+int amdgpu_do_asic_reset(struct list_head *device_list_handle,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ bool need_full_reset, skip_hw_reset;
+ int r = 0;
+
+ /* Try reset handler method first */
+ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
+ reset_list);
+
+ reset_context->reset_device_list = device_list_handle;
+ r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
+ /* If reset handler not implemented, continue; otherwise return */
+ if (r == -EOPNOTSUPP)
+ r = 0;
+ else
+ return r;
+
+ /* Reset handler not implemented, use the default method */
+ need_full_reset =
+ test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+ skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
+
+ /*
+ * ASIC reset has to be done on all XGMI hive nodes ASAP
+ * to allow proper links negotiation in FW (within 1 sec)
+ */
+ if (!skip_hw_reset && need_full_reset) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ /* For XGMI run all resets in parallel to speed up the process */
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (!queue_work(system_unbound_wq,
+ &tmp_adev->xgmi_reset_work))
+ r = -EALREADY;
+ } else
+ r = amdgpu_asic_reset(tmp_adev);
+
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "ASIC reset failed with error, %d for drm dev, %s",
+ r, adev_to_drm(tmp_adev)->unique);
+ goto out;
+ }
+ }
+
+ /* For XGMI wait for all resets to complete before proceed */
+ if (!r) {
+ list_for_each_entry(tmp_adev, device_list_handle,
+ reset_list) {
+ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+ flush_work(&tmp_adev->xgmi_reset_work);
+ r = tmp_adev->asic_reset_res;
+ if (r)
+ break;
+ }
+ }
+ }
+ }
+
+ if (!r && amdgpu_ras_intr_triggered()) {
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ amdgpu_ras_reset_error_count(tmp_adev,
+ AMDGPU_RAS_BLOCK__MMHUB);
+ }
+
+ amdgpu_ras_intr_cleared();
+ }
+
+ r = amdgpu_device_reinit_after_reset(reset_context);
+ if (r == -EAGAIN)
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
else
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
+
+out:
return r;
}
@@ -5505,6 +5902,7 @@ static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1:
+ case AMD_RESET_METHOD_LINK:
adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
break;
case AMD_RESET_METHOD_MODE2:
@@ -5621,92 +6019,76 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle)
return ret;
}
-/**
- * amdgpu_device_gpu_recover - reset the asic and recover scheduler
- *
- * @adev: amdgpu_device pointer
- * @job: which job trigger hang
- * @reset_context: amdgpu reset context pointer
- *
- * Attempt to reset the GPU if it has hung (all asics).
- * Attempt to do soft-reset or full-reset and reinitialize Asic
- * Returns 0 for success or an error on failure.
- */
-
-int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
- struct amdgpu_job *job,
- struct amdgpu_reset_context *reset_context)
+static int amdgpu_device_recovery_prepare(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ struct amdgpu_hive_info *hive)
{
- struct list_head device_list, *device_list_handle = NULL;
- bool job_signaled = false;
- struct amdgpu_hive_info *hive = NULL;
struct amdgpu_device *tmp_adev = NULL;
- int i, r = 0;
- bool need_emergency_restart = false;
- bool audio_suspended = false;
- int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
-
- /*
- * Special case: RAS triggered and full reset isn't supported
- */
- need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
-
- /*
- * Flush RAM to disk so that after reboot
- * the user can read log and see why the system rebooted.
- */
- if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
- amdgpu_ras_get_context(adev)->reboot) {
- DRM_WARN("Emergency reboot.");
-
- ksys_sync_helper();
- emergency_restart();
- }
-
- dev_info(adev->dev, "GPU %s begin!\n",
- need_emergency_restart ? "jobs stop":"reset");
-
- if (!amdgpu_sriov_vf(adev))
- hive = amdgpu_get_xgmi_hive(adev);
- if (hive)
- mutex_lock(&hive->hive_lock);
+ int r;
- reset_context->job = job;
- reset_context->hive = hive;
/*
* Build list of devices to reset.
* In case we are in XGMI hive mode, resort the device list
* to put adev in the 1st position.
*/
- INIT_LIST_HEAD(&device_list);
if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- list_add_tail(&tmp_adev->reset_list, &device_list);
+ list_add_tail(&tmp_adev->reset_list, device_list);
if (adev->shutdown)
tmp_adev->shutdown = true;
+ if (adev->pcie_reset_ctx.occurs_dpc)
+ tmp_adev->pcie_reset_ctx.in_link_reset = true;
}
- if (!list_is_first(&adev->reset_list, &device_list))
- list_rotate_to_front(&adev->reset_list, &device_list);
- device_list_handle = &device_list;
+ if (!list_is_first(&adev->reset_list, device_list))
+ list_rotate_to_front(&adev->reset_list, device_list);
} else {
- list_add_tail(&adev->reset_list, &device_list);
- device_list_handle = &device_list;
+ list_add_tail(&adev->reset_list, device_list);
}
- if (!amdgpu_sriov_vf(adev)) {
- r = amdgpu_device_health_check(device_list_handle);
+ if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) {
+ r = amdgpu_device_health_check(device_list);
if (r)
- goto end_reset;
+ return r;
}
- /* We need to lock reset domain only once both for XGMI and single device */
- tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
- reset_list);
+ return 0;
+}
+
+static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ if (list_empty(device_list))
+ return;
+ tmp_adev =
+ list_first_entry(device_list, struct amdgpu_device, reset_list);
amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+}
- /* block all schedulers and reset given job's ring */
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev,
+ struct list_head *device_list)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ if (list_empty(device_list))
+ return;
+ tmp_adev =
+ list_first_entry(device_list, struct amdgpu_device, reset_list);
+ amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+}
+
+static int amdgpu_device_halt_activities(
+ struct amdgpu_device *adev, struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context,
+ struct list_head *device_list, struct amdgpu_hive_info *hive,
+ bool need_emergency_restart)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int i, r = 0;
+ /* block all schedulers and reset given job's ring */
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
amdgpu_device_set_mp1_state(tmp_adev);
/*
@@ -5720,7 +6102,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* some audio codec errors.
*/
if (!amdgpu_device_suspend_display_audio(tmp_adev))
- audio_suspended = true;
+ tmp_adev->pcie_reset_ctx.audio_suspended = true;
amdgpu_ras_set_error_query_ready(tmp_adev, false);
@@ -5729,15 +6111,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
/*
- * Mark these ASICs to be reseted as untracked first
+ * Mark these ASICs to be reset as untracked first
* And add them back after reset completed
*/
amdgpu_unregister_gpu_instance(tmp_adev);
- drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
+ drm_client_dev_suspend(adev_to_drm(tmp_adev), false);
/* disable ras on ALL IPs */
if (!need_emergency_restart &&
+ (!adev->pcie_reset_ctx.occurs_dpc) &&
amdgpu_device_ip_need_full_reset(tmp_adev))
amdgpu_ras_suspend(tmp_adev);
@@ -5755,24 +6138,24 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
atomic_inc(&tmp_adev->gpu_reset_counter);
}
- if (need_emergency_restart)
- goto skip_sched_resume;
+ return r;
+}
- /*
- * Must check guilty signal here since after this point all old
- * HW fences are force signaled.
- *
- * job->base holds a reference to parent fence
- */
- if (job && dma_fence_is_signaled(&job->hw_fence)) {
- job_signaled = true;
- dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
- goto skip_hw_reset;
- }
+static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
+ int r = 0;
retry: /* Rest of adevs pre asic reset from XGMI hive. */
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
+ if (adev->pcie_reset_ctx.occurs_dpc)
+ tmp_adev->no_hw_access = true;
r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
+ if (adev->pcie_reset_ctx.occurs_dpc)
+ tmp_adev->no_hw_access = false;
/*TODO Should we stop ?*/
if (r) {
dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
@@ -5784,6 +6167,11 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
/* Actual ASIC resets if needed.*/
/* Host driver will handle XGMI hive reset for SRIOV */
if (amdgpu_sriov_vf(adev)) {
+
+ /* Bail out of reset early */
+ if (amdgpu_ras_is_rma(adev))
+ return -ENODEV;
+
if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
amdgpu_ras_set_fed(adev, true);
@@ -5798,12 +6186,12 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
if (r)
adev->asic_reset_res = r;
} else {
- r = amdgpu_do_asic_reset(device_list_handle, reset_context);
+ r = amdgpu_do_asic_reset(device_list, reset_context);
if (r && r == -EAGAIN)
goto retry;
}
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
/*
* Drop any pending non scheduler resets queued before reset is done.
* Any reset scheduled after this point would be valid. Scheduler resets
@@ -5813,10 +6201,18 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
amdgpu_device_stop_pending_resets(tmp_adev);
}
-skip_hw_reset:
+ return r;
+}
+
+static int amdgpu_device_sched_resume(struct list_head *device_list,
+ struct amdgpu_reset_context *reset_context,
+ bool job_signaled)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+ int i, r = 0;
/* Post ASIC reset for all devs .*/
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
@@ -5824,7 +6220,7 @@ skip_hw_reset:
if (!amdgpu_ring_sched_ready(ring))
continue;
- drm_sched_start(&ring->sched);
+ drm_sched_start(&ring->sched, 0);
}
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
@@ -5852,8 +6248,16 @@ skip_hw_reset:
}
}
-skip_sched_resume:
- list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ return r;
+}
+
+static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
+ struct list_head *device_list,
+ bool need_emergency_restart)
+{
+ struct amdgpu_device *tmp_adev = NULL;
+
+ list_for_each_entry(tmp_adev, device_list, reset_list) {
/* unlock kfd: SRIOV would do it separately */
if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_post_reset(tmp_adev);
@@ -5864,18 +6268,117 @@ skip_sched_resume:
if (!adev->kfd.init_complete)
amdgpu_amdkfd_device_init(adev);
- if (audio_suspended)
+ if (tmp_adev->pcie_reset_ctx.audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);
amdgpu_device_unset_mp1_state(tmp_adev);
amdgpu_ras_set_error_query_ready(tmp_adev, true);
+
}
+}
- tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
- reset_list);
- amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+/**
+ * amdgpu_device_gpu_recover - reset the asic and recover scheduler
+ *
+ * @adev: amdgpu_device pointer
+ * @job: which job trigger hang
+ * @reset_context: amdgpu reset context pointer
+ *
+ * Attempt to reset the GPU if it has hung (all asics).
+ * Attempt to do soft-reset or full-reset and reinitialize Asic
+ * Returns 0 for success or an error on failure.
+ */
+
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head device_list;
+ bool job_signaled = false;
+ struct amdgpu_hive_info *hive = NULL;
+ int r = 0;
+ bool need_emergency_restart = false;
+
+ /*
+ * If it reaches here because of hang/timeout and a RAS error is
+ * detected at the same time, let RAS recovery take care of it.
+ */
+ if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) &&
+ !amdgpu_sriov_vf(adev) &&
+ reset_context->src != AMDGPU_RESET_SRC_RAS) {
+ dev_dbg(adev->dev,
+ "Gpu recovery from source: %d yielding to RAS error recovery handling",
+ reset_context->src);
+ return 0;
+ }
+
+ /*
+ * Special case: RAS triggered and full reset isn't supported
+ */
+ need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
+
+ /*
+ * Flush RAM to disk so that after reboot
+ * the user can read log and see why the system rebooted.
+ */
+ if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
+ amdgpu_ras_get_context(adev)->reboot) {
+ DRM_WARN("Emergency reboot.");
+
+ ksys_sync_helper();
+ emergency_restart();
+ }
+
+ dev_info(adev->dev, "GPU %s begin!\n",
+ need_emergency_restart ? "jobs stop":"reset");
+
+ if (!amdgpu_sriov_vf(adev))
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive)
+ mutex_lock(&hive->hive_lock);
+
+ reset_context->job = job;
+ reset_context->hive = hive;
+ INIT_LIST_HEAD(&device_list);
+
+ if (amdgpu_device_recovery_prepare(adev, &device_list, hive))
+ goto end_reset;
+
+ /* We need to lock reset domain only once both for XGMI and single device */
+ amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+
+ r = amdgpu_device_halt_activities(adev, job, reset_context, &device_list,
+ hive, need_emergency_restart);
+ if (r)
+ goto reset_unlock;
+
+ if (need_emergency_restart)
+ goto skip_sched_resume;
+ /*
+ * Must check guilty signal here since after this point all old
+ * HW fences are force signaled.
+ *
+ * job->base holds a reference to parent fence
+ */
+ if (job && dma_fence_is_signaled(&job->hw_fence.base)) {
+ job_signaled = true;
+ dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
+ goto skip_hw_reset;
+ }
+
+ r = amdgpu_device_asic_reset(adev, &device_list, reset_context);
+ if (r)
+ goto reset_unlock;
+skip_hw_reset:
+ r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled);
+ if (r)
+ goto reset_unlock;
+skip_sched_resume:
+ amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart);
+reset_unlock:
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
end_reset:
if (hive) {
mutex_unlock(&hive->hive_lock);
@@ -5886,6 +6389,10 @@ end_reset:
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
atomic_set(&adev->reset_domain->reset_res, r);
+
+ if (!r)
+ drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE);
+
return r;
}
@@ -5928,19 +6435,56 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
}
/**
+ * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
+ *
+ * @adev: amdgpu_device pointer
+ * @speed: pointer to the speed of the link
+ * @width: pointer to the width of the link
+ *
+ * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
+ * AMD dGPU which may be a virtual upstream bridge.
+ */
+static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
+ enum pci_bus_speed *speed,
+ enum pcie_link_width *width)
+{
+ struct pci_dev *parent = adev->pdev;
+
+ if (!speed || !width)
+ return;
+
+ parent = pci_upstream_bridge(parent);
+ if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
+ /* use the upstream/downstream switches internal to dGPU */
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ while ((parent = pci_upstream_bridge(parent))) {
+ if (parent->vendor == PCI_VENDOR_ID_ATI) {
+ /* use the upstream/downstream switches internal to dGPU */
+ *speed = pcie_get_speed_cap(parent);
+ *width = pcie_get_width_cap(parent);
+ }
+ }
+ } else {
+ /* use the device itself */
+ *speed = pcie_get_speed_cap(adev->pdev);
+ *width = pcie_get_width_cap(adev->pdev);
+ }
+}
+
+/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
* @adev: amdgpu_device pointer
*
- * Fetchs and stores in the driver the PCIE capabilities (gen speed
+ * Fetches and stores in the driver the PCIE capabilities (gen speed
* and lanes) of the slot the device is in. Handles APUs and
* virtualized environments where PCIE config space may not be available.
*/
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
{
- struct pci_dev *pdev;
enum pci_bus_speed speed_cap, platform_speed_cap;
- enum pcie_link_width platform_link_width;
+ enum pcie_link_width platform_link_width, link_width;
if (amdgpu_pcie_gen_cap)
adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
@@ -5962,11 +6506,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
&platform_link_width);
+ amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width);
if (adev->pm.pcie_gen_mask == 0) {
/* asic caps */
- pdev = adev->pdev;
- speed_cap = pcie_get_speed_cap(pdev);
if (speed_cap == PCI_SPEED_UNKNOWN) {
adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
@@ -6022,51 +6565,103 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
}
if (adev->pm.pcie_mlw_mask == 0) {
+ /* asic caps */
+ if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
+ adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
+ } else {
+ switch (link_width) {
+ case PCIE_LNK_X32:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X16:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X12:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X8:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X4:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X2:
+ adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case PCIE_LNK_X1:
+ adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
+ break;
+ default:
+ break;
+ }
+ }
+ /* platform caps */
if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
} else {
switch (platform_link_width) {
case PCIE_LNK_X32:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X16:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X12:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X8:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X4:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X2:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
case PCIE_LNK_X1:
- adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
+ adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
break;
default:
break;
@@ -6092,6 +6687,9 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
bool p2p_access =
!adev->gmc.xgmi.connected_to_cpu &&
!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
+ if (!p2p_access)
+ dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
+ pci_name(peer_adev->pdev));
bool is_large_bar = adev->gmc.visible_vram_size &&
adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
@@ -6164,12 +6762,15 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int i;
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ struct amdgpu_reset_context reset_context;
+ struct list_head device_list;
+ int r = 0;
- DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
+ dev_info(adev->dev, "PCI error: detected callback!!\n");
- if (adev->gmc.xgmi.num_physical_nodes > 1) {
- DRM_WARN("No support for XGMI hive yet...");
+ if (!amdgpu_dpm_is_link_reset_supported(adev)) {
+ dev_warn(adev->dev, "No support for XGMI hive yet...\n");
return PCI_ERS_RESULT_DISCONNECT;
}
@@ -6177,32 +6778,32 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
switch (state) {
case pci_channel_io_normal:
+ dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
return PCI_ERS_RESULT_CAN_RECOVER;
- /* Fatal error, prepare for slot reset */
case pci_channel_io_frozen:
- /*
- * Locking adev->reset_domain->sem will prevent any external access
- * to GPU during PCI error recovery
- */
- amdgpu_device_lock_reset_domain(adev->reset_domain);
- amdgpu_device_set_mp1_state(adev);
-
- /*
- * Block any work scheduling as we do for regular GPU reset
- * for the duration of the recovery
- */
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
-
- if (!amdgpu_ring_sched_ready(ring))
- continue;
-
- drm_sched_stop(&ring->sched, NULL);
+ /* Fatal error, prepare for slot reset */
+ dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
+
+ if (hive)
+ mutex_lock(&hive->hive_lock);
+ adev->pcie_reset_ctx.occurs_dpc = true;
+ memset(&reset_context, 0, sizeof(reset_context));
+ INIT_LIST_HEAD(&device_list);
+
+ amdgpu_device_recovery_prepare(adev, &device_list, hive);
+ amdgpu_device_recovery_get_reset_lock(adev, &device_list);
+ r = amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list,
+ hive, false);
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
}
- atomic_inc(&adev->gpu_reset_counter);
+ if (r)
+ return PCI_ERS_RESULT_DISCONNECT;
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
/* Permanent error, prepare for device removal */
+ dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
return PCI_ERS_RESULT_DISCONNECT;
}
@@ -6215,8 +6816,10 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
*/
pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(dev);
- DRM_INFO("PCI error: mmio enabled callback!!\n");
+ dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
/* TODO - dump whatever for debugging purposes */
@@ -6240,10 +6843,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int r, i;
struct amdgpu_reset_context reset_context;
- u32 memsize;
+ struct amdgpu_device *tmp_adev;
+ struct amdgpu_hive_info *hive;
struct list_head device_list;
+ int r = 0, i;
+ u32 memsize;
/* PCI error slot reset should be skipped During RAS recovery */
if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
@@ -6251,15 +6856,12 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
amdgpu_ras_in_recovery(adev))
return PCI_ERS_RESULT_RECOVERED;
- DRM_INFO("PCI error: slot reset callback!!\n");
+ dev_info(adev->dev, "PCI error: slot reset callback!!\n");
memset(&reset_context, 0, sizeof(reset_context));
- INIT_LIST_HEAD(&device_list);
- list_add_tail(&adev->reset_list, &device_list);
-
/* wait for asic to come out of reset */
- msleep(500);
+ msleep(700);
/* Restore PCI confspace */
amdgpu_device_load_pci_state(pdev);
@@ -6280,26 +6882,40 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
- set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
-
- adev->no_hw_access = true;
- r = amdgpu_device_pre_asic_reset(adev, &reset_context);
- adev->no_hw_access = false;
- if (r)
- goto out;
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+ INIT_LIST_HEAD(&device_list);
- r = amdgpu_do_asic_reset(&device_list, &reset_context);
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ mutex_lock(&hive->hive_lock);
+ reset_context.hive = hive;
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ tmp_adev->pcie_reset_ctx.in_link_reset = true;
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ }
+ } else {
+ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
+ list_add_tail(&adev->reset_list, &device_list);
+ }
+ r = amdgpu_device_asic_reset(adev, &device_list, &reset_context);
out:
if (!r) {
if (amdgpu_device_cache_pci_state(adev->pdev))
pci_restore_state(adev->pdev);
-
- DRM_INFO("PCIe error recovery succeeded\n");
+ dev_info(adev->dev, "PCIe error recovery succeeded\n");
} else {
- DRM_ERROR("PCIe error recovery failed, err:%d", r);
- amdgpu_device_unset_mp1_state(adev);
- amdgpu_device_unlock_reset_domain(adev->reset_domain);
+ dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
+ if (hive) {
+ list_for_each_entry(tmp_adev, &device_list, reset_list)
+ amdgpu_device_unset_mp1_state(tmp_adev);
+ }
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+ }
+
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
}
return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
@@ -6316,26 +6932,37 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
struct amdgpu_device *adev = drm_to_adev(dev);
- int i;
-
+ struct list_head device_list;
+ struct amdgpu_hive_info *hive = NULL;
+ struct amdgpu_device *tmp_adev = NULL;
- DRM_INFO("PCI error: resume callback!!\n");
+ dev_info(adev->dev, "PCI error: resume callback!!\n");
/* Only continue execution for the case of pci_channel_io_frozen */
if (adev->pci_channel_state != pci_channel_io_frozen)
return;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
+ INIT_LIST_HEAD(&device_list);
- if (!amdgpu_ring_sched_ready(ring))
- continue;
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ mutex_lock(&hive->hive_lock);
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ tmp_adev->pcie_reset_ctx.in_link_reset = false;
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+ }
+ } else
+ list_add_tail(&adev->reset_list, &device_list);
- drm_sched_start(&ring->sched);
- }
+ amdgpu_device_sched_resume(&device_list, NULL, NULL);
+ amdgpu_device_gpu_resume(adev, &device_list, false);
+ amdgpu_device_recovery_put_reset_lock(adev, &device_list);
+ adev->pcie_reset_ctx.occurs_dpc = false;
- amdgpu_device_unset_mp1_state(adev);
- amdgpu_device_unlock_reset_domain(adev->reset_domain);
+ if (hive) {
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
+ }
}
bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
@@ -6344,6 +6971,9 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
struct amdgpu_device *adev = drm_to_adev(dev);
int r;
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
r = pci_save_state(pdev);
if (!r) {
kfree(adev->pci_state);
@@ -6520,22 +7150,117 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
{
struct dma_fence *old = NULL;
+ dma_fence_get(gang);
do {
dma_fence_put(old);
old = amdgpu_device_get_gang(adev);
if (old == gang)
break;
- if (!dma_fence_is_signaled(old))
+ if (!dma_fence_is_signaled(old)) {
+ dma_fence_put(gang);
return old;
+ }
} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
old, gang) != old);
+ /*
+ * Drop it once for the exchanged reference in adev and once for the
+ * thread local reference acquired in amdgpu_device_get_gang().
+ */
+ dma_fence_put(old);
dma_fence_put(old);
return NULL;
}
+/**
+ * amdgpu_device_enforce_isolation - enforce HW isolation
+ * @adev: the amdgpu device pointer
+ * @ring: the HW ring the job is supposed to run on
+ * @job: the job which is about to be pushed to the HW ring
+ *
+ * Makes sure that only one client at a time can use the GFX block.
+ * Returns: The dependency to wait on before the job can be pushed to the HW.
+ * The function is called multiple times until NULL is returned.
+ */
+struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_job *job)
+{
+ struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
+ struct drm_sched_fence *f = job->base.s_fence;
+ struct dma_fence *dep;
+ void *owner;
+ int r;
+
+ /*
+ * For now enforce isolation only for the GFX block since we only need
+ * the cleaner shader on those rings.
+ */
+ if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
+ ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+ return NULL;
+
+ /*
+ * All submissions where enforce isolation is false are handled as if
+ * they come from a single client. Use ~0l as the owner to distinct it
+ * from kernel submissions where the owner is NULL.
+ */
+ owner = job->enforce_isolation ? f->owner : (void *)~0l;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+
+ /*
+ * The "spearhead" submission is the first one which changes the
+ * ownership to its client. We always need to wait for it to be
+ * pushed to the HW before proceeding with anything.
+ */
+ if (&f->scheduled != isolation->spearhead &&
+ !dma_fence_is_signaled(isolation->spearhead)) {
+ dep = isolation->spearhead;
+ goto out_grab_ref;
+ }
+
+ if (isolation->owner != owner) {
+
+ /*
+ * Wait for any gang to be assembled before switching to a
+ * different owner or otherwise we could deadlock the
+ * submissions.
+ */
+ if (!job->gang_submit) {
+ dep = amdgpu_device_get_gang(adev);
+ if (!dma_fence_is_signaled(dep))
+ goto out_return_dep;
+ dma_fence_put(dep);
+ }
+
+ dma_fence_put(isolation->spearhead);
+ isolation->spearhead = dma_fence_get(&f->scheduled);
+ amdgpu_sync_move(&isolation->active, &isolation->prev);
+ trace_amdgpu_isolation(isolation->owner, owner);
+ isolation->owner = owner;
+ }
+
+ /*
+ * Specifying the ring here helps to pipeline submissions even when
+ * isolation is enabled. If that is not desired for testing NULL can be
+ * used instead of the ring to enforce a CPU round trip while switching
+ * between clients.
+ */
+ dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
+ r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
+ if (r)
+ DRM_WARN("OOM tracking isolation\n");
+
+out_grab_ref:
+ dma_fence_get(dep);
+out_return_dep:
+ mutex_unlock(&adev->enforce_isolation_mutex);
+ return dep;
+}
+
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
@@ -6604,3 +7329,47 @@ uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
}
return ret;
}
+
+ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
+{
+ ssize_t size = 0;
+
+ if (!ring || !ring->adev)
+ return size;
+
+ if (amdgpu_device_should_recover_gpu(ring->adev))
+ size |= AMDGPU_RESET_TYPE_FULL;
+
+ if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
+ !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
+ size |= AMDGPU_RESET_TYPE_SOFT_RESET;
+
+ return size;
+}
+
+ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
+{
+ ssize_t size = 0;
+
+ if (supported_reset == 0) {
+ size += sysfs_emit_at(buf, size, "unsupported");
+ size += sysfs_emit_at(buf, size, "\n");
+ return size;
+
+ }
+
+ if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
+ size += sysfs_emit_at(buf, size, "soft ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
+ size += sysfs_emit_at(buf, size, "queue ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
+ size += sysfs_emit_at(buf, size, "pipe ");
+
+ if (supported_reset & AMDGPU_RESET_TYPE_FULL)
+ size += sysfs_emit_at(buf, size, "full ");
+
+ size += sysfs_emit_at(buf, size, "\n");
+ return size;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 4bd61c169ca8..81b3443c8d7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -104,15 +104,24 @@
#include "smuio_v13_0_6.h"
#include "smuio_v14_0_2.h"
#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
#include "jpeg_v5_0_0.h"
+#include "jpeg_v5_0_1.h"
#include "amdgpu_vpe.h"
#if defined(CONFIG_DRM_AMD_ISP)
#include "amdgpu_isp.h"
#endif
-#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin"
-MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY);
+MODULE_FIRMWARE("amdgpu/ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/arcturus_ip_discovery.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_ip_discovery.bin");
#define mmIP_DISCOVERY_VERSION 0x16A00
#define mmRCC_CONFIG_MEMSIZE 0xde3
@@ -261,9 +270,10 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev,
static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
uint8_t *binary)
{
+ bool sz_valid = true;
uint64_t vram_size;
- u32 msg;
int i, ret = 0;
+ u32 msg;
if (!amdgpu_sriov_vf(adev)) {
/* It can take up to a second for IFWI init to complete on some dGPUs,
@@ -282,9 +292,13 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
}
}
- vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
+ vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
+ if (!vram_size || vram_size == U32_MAX)
+ sz_valid = false;
+ else
+ vram_size <<= 20;
- if (vram_size) {
+ if (sz_valid) {
uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
adev->mman.discovery_tmr_size, false);
@@ -292,28 +306,27 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
ret = amdgpu_discovery_read_binary_from_sysmem(adev, binary);
}
+ if (ret)
+ dev_err(adev->dev,
+ "failed to read discovery info from memory, vram size read: %llx",
+ vram_size);
+
return ret;
}
-static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary)
+static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev,
+ uint8_t *binary,
+ const char *fw_name)
{
const struct firmware *fw;
- const char *fw_name;
int r;
- switch (amdgpu_discovery) {
- case 2:
- fw_name = FIRMWARE_IP_DISCOVERY;
- break;
- default:
- dev_warn(adev->dev, "amdgpu_discovery is not set properly\n");
- return -EINVAL;
- }
-
- r = request_firmware(&fw, fw_name, adev->dev);
+ r = firmware_request_nowarn(&fw, fw_name, adev->dev);
if (r) {
- dev_err(adev->dev, "can't load firmware \"%s\"\n",
- fw_name);
+ if (amdgpu_discovery == 2)
+ dev_err(adev->dev, "can't load firmware \"%s\"\n", fw_name);
+ else
+ drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fw_name);
return r;
}
@@ -402,10 +415,39 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
return 0;
}
+static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev)
+{
+ if (amdgpu_discovery == 2)
+ return "amdgpu/ip_discovery.bin";
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ return "amdgpu/vega10_ip_discovery.bin";
+ case CHIP_VEGA12:
+ return "amdgpu/vega12_ip_discovery.bin";
+ case CHIP_RAVEN:
+ if (adev->apu_flags & AMD_APU_IS_RAVEN2)
+ return "amdgpu/raven2_ip_discovery.bin";
+ else if (adev->apu_flags & AMD_APU_IS_PICASSO)
+ return "amdgpu/picasso_ip_discovery.bin";
+ else
+ return "amdgpu/raven_ip_discovery.bin";
+ case CHIP_VEGA20:
+ return "amdgpu/vega20_ip_discovery.bin";
+ case CHIP_ARCTURUS:
+ return "amdgpu/arcturus_ip_discovery.bin";
+ case CHIP_ALDEBARAN:
+ return "amdgpu/aldebaran_ip_discovery.bin";
+ default:
+ return NULL;
+ }
+}
+
static int amdgpu_discovery_init(struct amdgpu_device *adev)
{
struct table_info *info;
struct binary_header *bhdr;
+ const char *fw_name;
uint16_t offset;
uint16_t size;
uint16_t checksum;
@@ -417,17 +459,14 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
return -ENOMEM;
/* Read from file if it is the preferred option */
- if (amdgpu_discovery == 2) {
- dev_info(adev->dev, "use ip discovery information from file");
- r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin);
-
- if (r) {
- dev_err(adev->dev, "failed to read ip discovery binary from file\n");
- r = -EINVAL;
+ fw_name = amdgpu_discovery_get_fw_name(adev);
+ if (fw_name != NULL) {
+ drm_dbg(&adev->ddev, "use ip discovery information from file");
+ r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name);
+ if (r)
goto out;
- }
-
} else {
+ drm_dbg(&adev->ddev, "use ip discovery information from memory");
r = amdgpu_discovery_read_binary_from_mem(
adev, adev->mman.discovery_bin);
if (r)
@@ -585,16 +624,19 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev)
adev->mman.discovery_bin = NULL;
}
-static int amdgpu_discovery_validate_ip(const struct ip_v4 *ip)
+static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev,
+ uint8_t instance, uint16_t hw_id)
{
- if (ip->instance_number >= HWIP_MAX_INSTANCE) {
- DRM_ERROR("Unexpected instance_number (%d) from ip discovery blob\n",
- ip->instance_number);
+ if (instance >= HWIP_MAX_INSTANCE) {
+ dev_err(adev->dev,
+ "Unexpected instance_number (%d) from ip discovery blob\n",
+ instance);
return -EINVAL;
}
- if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) {
- DRM_ERROR("Unexpected hw_id (%d) from ip discovery blob\n",
- le16_to_cpu(ip->hw_id));
+ if (hw_id >= HW_ID_MAX) {
+ dev_err(adev->dev,
+ "Unexpected hw_id (%d) from ip discovery blob\n",
+ hw_id);
return -EINVAL;
}
@@ -607,8 +649,10 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
struct die_header *dhdr;
- struct ip_v4 *ip;
+ struct ip *ip;
uint16_t die_offset, ip_offset, num_dies, num_ips;
+ uint16_t hw_id;
+ uint8_t inst;
int i, j;
bhdr = (struct binary_header *)adev->mman.discovery_bin;
@@ -624,16 +668,18 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
ip_offset = die_offset + sizeof(*dhdr);
for (j = 0; j < num_ips; j++) {
- ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
-
- if (amdgpu_discovery_validate_ip(ip))
+ ip = (struct ip *)(adev->mman.discovery_bin +
+ ip_offset);
+ inst = ip->number_instance;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
goto next_ip;
- if (le16_to_cpu(ip->variant) == 1) {
- switch (le16_to_cpu(ip->hw_id)) {
+ if (ip->harvest == 1) {
+ switch (hw_id) {
case VCN_HWID:
(*vcn_harvest_count)++;
- if (ip->instance_number == 0) {
+ if (inst == 0) {
adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0;
adev->vcn.inst_mask &=
~AMDGPU_VCN_HARVEST_VCN0;
@@ -655,10 +701,8 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
}
}
next_ip:
- if (ihdr->base_addr_64_bit)
- ip_offset += struct_size(ip, base_address_64, ip->num_base_address);
- else
- ip_offset += struct_size(ip, base_address, ip->num_base_address);
+ ip_offset += struct_size(ip, base_address,
+ ip->num_base_address);
}
}
}
@@ -1017,6 +1061,8 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
bool reg_base_64)
{
int ii, jj, kk, res;
+ uint16_t hw_id;
+ uint8_t inst;
DRM_DEBUG("num_ips:%d", num_ips);
@@ -1032,8 +1078,10 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
struct ip_hw_instance *ip_hw_instance;
ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
- if (amdgpu_discovery_validate_ip(ip) ||
- le16_to_cpu(ip->hw_id) != ii)
+ inst = ip->instance_number;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id) ||
+ hw_id != ii)
goto next_ip;
DRM_DEBUG("match:%d @ ip_offset:%zu", ii, ip_offset);
@@ -1279,17 +1327,19 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
uint16_t die_offset;
uint16_t ip_offset;
uint16_t num_dies;
+ uint32_t wafl_ver;
uint16_t num_ips;
+ uint16_t hw_id;
+ uint8_t inst;
int hw_ip;
int i, j, k;
int r;
r = amdgpu_discovery_init(adev);
- if (r) {
- DRM_ERROR("amdgpu_discovery_init failed\n");
+ if (r)
return r;
- }
+ wafl_ver = 0;
adev->gfx.xcc_mask = 0;
adev->sdma.sdma_mask = 0;
adev->vcn.inst_mask = 0;
@@ -1319,7 +1369,9 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
for (j = 0; j < num_ips; j++) {
ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset);
- if (amdgpu_discovery_validate_ip(ip))
+ inst = ip->instance_number;
+ hw_id = le16_to_cpu(ip->hw_id);
+ if (amdgpu_discovery_validate_ip(adev, inst, hw_id))
goto next_ip;
num_base_address = ip->num_base_address;
@@ -1340,7 +1392,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
*/
if (adev->vcn.num_vcn_inst <
AMDGPU_MAX_VCN_INSTANCES) {
- adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
+ adev->vcn.inst[adev->vcn.num_vcn_inst].vcn_config =
ip->revision & 0xc0;
adev->vcn.num_vcn_inst++;
adev->vcn.inst_mask |=
@@ -1388,6 +1440,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
adev->gfx.xcc_mask |=
(1U << ip->instance_number);
+ if (!wafl_ver && le16_to_cpu(ip->hw_id) == WAFLC_HWID)
+ wafl_ver = IP_VERSION_FULL(ip->major, ip->minor,
+ ip->revision, 0, 0);
+
for (k = 0; k < num_base_address; k++) {
/*
* convert the endianness of base addresses in place,
@@ -1453,22 +1509,32 @@ next_ip:
}
}
+ if (wafl_ver && !adev->ip_versions[XGMI_HWIP][0])
+ adev->ip_versions[XGMI_HWIP][0] = wafl_ver;
+
return 0;
}
static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
{
+ struct ip_discovery_header *ihdr;
+ struct binary_header *bhdr;
int vcn_harvest_count = 0;
int umc_harvest_count = 0;
+ uint16_t offset, ihdr_ver;
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset);
+ ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
+ offset);
+ ihdr_ver = le16_to_cpu(ihdr->version);
/*
* Harvest table does not fit Navi1x and legacy GPUs,
* so read harvest bit per IP data structure to set
* harvest configuration.
*/
if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 2, 0) &&
- amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) &&
- amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4)) {
+ ihdr_ver <= 2) {
if ((adev->pdev->device == 0x731E &&
(adev->pdev->revision == 0xC6 ||
adev->pdev->revision == 0xC7)) ||
@@ -1705,7 +1771,7 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
* so this won't overflow.
*/
for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
- adev->vcn.vcn_codec_disable_mask[v] =
+ adev->vcn.inst[v].vcn_codec_disable_mask =
le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
}
break;
@@ -1723,45 +1789,85 @@ union nps_info {
struct nps_info_v1_0 v1;
};
+static int amdgpu_discovery_refresh_nps_info(struct amdgpu_device *adev,
+ union nps_info *nps_data)
+{
+ uint64_t vram_size, pos, offset;
+ struct nps_info_header *nhdr;
+ struct binary_header bhdr;
+ uint16_t checksum;
+
+ vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
+ pos = vram_size - DISCOVERY_TMR_OFFSET;
+ amdgpu_device_vram_access(adev, pos, &bhdr, sizeof(bhdr), false);
+
+ offset = le16_to_cpu(bhdr.table_list[NPS_INFO].offset);
+ checksum = le16_to_cpu(bhdr.table_list[NPS_INFO].checksum);
+
+ amdgpu_device_vram_access(adev, (pos + offset), nps_data,
+ sizeof(*nps_data), false);
+
+ nhdr = (struct nps_info_header *)(nps_data);
+ if (!amdgpu_discovery_verify_checksum((uint8_t *)nps_data,
+ le32_to_cpu(nhdr->size_bytes),
+ checksum)) {
+ dev_err(adev->dev, "nps data refresh, checksum mismatch\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
uint32_t *nps_type,
struct amdgpu_gmc_memrange **ranges,
- int *range_cnt)
+ int *range_cnt, bool refresh)
{
struct amdgpu_gmc_memrange *mem_ranges;
struct binary_header *bhdr;
union nps_info *nps_info;
+ union nps_info nps_data;
u16 offset;
- int i;
+ int i, r;
if (!nps_type || !range_cnt || !ranges)
return -EINVAL;
- if (!adev->mman.discovery_bin) {
- dev_err(adev->dev,
- "fetch mem range failed, ip discovery uninitialized\n");
- return -EINVAL;
- }
+ if (refresh) {
+ r = amdgpu_discovery_refresh_nps_info(adev, &nps_data);
+ if (r)
+ return r;
+ nps_info = &nps_data;
+ } else {
+ if (!adev->mman.discovery_bin) {
+ dev_err(adev->dev,
+ "fetch mem range failed, ip discovery uninitialized\n");
+ return -EINVAL;
+ }
- bhdr = (struct binary_header *)adev->mman.discovery_bin;
- offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
- if (!offset)
- return -ENOENT;
+ if (!offset)
+ return -ENOENT;
- /* If verification fails, return as if NPS table doesn't exist */
- if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
- return -ENOENT;
+ /* If verification fails, return as if NPS table doesn't exist */
+ if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
+ return -ENOENT;
- nps_info = (union nps_info *)(adev->mman.discovery_bin + offset);
+ nps_info =
+ (union nps_info *)(adev->mman.discovery_bin + offset);
+ }
switch (le16_to_cpu(nps_info->v1.header.version_major)) {
case 1:
+ mem_ranges = kvcalloc(nps_info->v1.count,
+ sizeof(*mem_ranges),
+ GFP_KERNEL);
+ if (!mem_ranges)
+ return -ENOMEM;
*nps_type = nps_info->v1.nps_type;
*range_cnt = nps_info->v1.count;
- mem_ranges = kvzalloc(
- *range_cnt * sizeof(struct amdgpu_gmc_memrange),
- GFP_KERNEL);
for (i = 0; i < *range_cnt; i++) {
mem_ranges[i].base_address =
nps_info->v1.instance_info[i].base_address;
@@ -1796,6 +1902,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
break;
case IP_VERSION(10, 1, 10):
@@ -1821,6 +1928,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
break;
case IP_VERSION(12, 0, 0):
@@ -1850,6 +1958,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
break;
case IP_VERSION(10, 1, 10):
@@ -1875,6 +1984,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
break;
case IP_VERSION(12, 0, 0):
@@ -1954,6 +2064,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 12):
case IP_VERSION(11, 0, 13):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
break;
case IP_VERSION(11, 0, 8):
@@ -1973,6 +2084,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 11):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
case IP_VERSION(14, 0, 0):
case IP_VERSION(14, 0, 1):
@@ -1984,6 +2096,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(14, 0, 2):
case IP_VERSION(14, 0, 3):
+ case IP_VERSION(14, 0, 5):
amdgpu_device_ip_block_add(adev, &psp_v14_0_ip_block);
break;
default:
@@ -2016,6 +2129,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 12):
case IP_VERSION(11, 0, 13):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
break;
case IP_VERSION(12, 0, 0):
@@ -2034,6 +2148,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 11):
case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 12):
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
break;
case IP_VERSION(14, 0, 0):
@@ -2041,6 +2156,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(14, 0, 2):
case IP_VERSION(14, 0, 3):
case IP_VERSION(14, 0, 4):
+ case IP_VERSION(14, 0, 5):
amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
break;
default:
@@ -2092,6 +2208,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 2, 1):
case IP_VERSION(3, 5, 0):
case IP_VERSION(3, 5, 1):
+ case IP_VERSION(3, 6, 0):
case IP_VERSION(4, 1, 0):
/* TODO: Fix IP version. DC code expects version 4.0.1 */
if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 0))
@@ -2144,6 +2261,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
break;
case IP_VERSION(10, 1, 10):
@@ -2169,6 +2287,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
break;
case IP_VERSION(12, 0, 0):
@@ -2198,6 +2317,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(4, 4, 2):
case IP_VERSION(4, 4, 5):
+ case IP_VERSION(4, 4, 4):
amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block);
break;
case IP_VERSION(5, 0, 0):
@@ -2223,6 +2343,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(6, 1, 0):
case IP_VERSION(6, 1, 1):
case IP_VERSION(6, 1, 2):
+ case IP_VERSION(6, 1, 3):
amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block);
break;
case IP_VERSION(7, 0, 0):
@@ -2321,6 +2442,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block);
break;
+ case IP_VERSION(5, 0, 1):
+ amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n",
@@ -2342,6 +2467,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
adev->enable_mes = true;
adev->enable_mes_kiq = true;
@@ -2365,6 +2491,7 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
aqua_vanjaram_init_soc_config(adev);
break;
default:
@@ -2428,6 +2555,40 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_RAVEN:
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ case CHIP_ALDEBARAN:
+ /* this is not fatal. We have a fallback below
+ * if the new firmwares are not present. some of
+ * this will be overridden below to keep things
+ * consistent with the current behavior.
+ */
+ r = amdgpu_discovery_reg_base_init(adev);
+ if (!r) {
+ amdgpu_discovery_harvest_ip(adev);
+ amdgpu_discovery_get_gfx_info(adev);
+ amdgpu_discovery_get_mall_info(adev);
+ amdgpu_discovery_get_vcn_info(adev);
+ }
+ break;
+ default:
+ r = amdgpu_discovery_reg_base_init(adev);
+ if (r) {
+ drm_err(&adev->ddev, "discovery failed: %d\n", r);
+ return r;
+ }
+
+ amdgpu_discovery_harvest_ip(adev);
+ amdgpu_discovery_get_gfx_info(adev);
+ amdgpu_discovery_get_mall_info(adev);
+ amdgpu_discovery_get_vcn_info(adev);
+ break;
+ }
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
vega10_reg_base_init(adev);
adev->sdma.num_instances = 2;
adev->gmc.num_umc = 4;
@@ -2492,6 +2653,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 2, 2);
adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 1);
adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 1);
+ adev->ip_versions[ISP_HWIP][0] = IP_VERSION(2, 0, 0);
} else {
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 1, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 1, 0);
@@ -2508,6 +2670,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[GC_HWIP][0] = IP_VERSION(9, 1, 0);
adev->ip_versions[UVD_HWIP][0] = IP_VERSION(1, 0, 0);
adev->ip_versions[DCE_HWIP][0] = IP_VERSION(1, 0, 0);
+ adev->ip_versions[ISP_HWIP][0] = IP_VERSION(2, 0, 0);
}
break;
case CHIP_VEGA20:
@@ -2588,14 +2751,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
break;
default:
- r = amdgpu_discovery_reg_base_init(adev);
- if (r)
- return -EINVAL;
-
- amdgpu_discovery_harvest_ip(adev);
- amdgpu_discovery_get_gfx_info(adev);
- amdgpu_discovery_get_mall_info(adev);
- amdgpu_discovery_get_vcn_info(adev);
break;
}
@@ -2610,6 +2765,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
adev->family = AMDGPU_FAMILY_AI;
break;
case IP_VERSION(9, 1, 0):
@@ -2653,6 +2809,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
adev->family = AMDGPU_FAMILY_GC_11_5_0;
break;
case IP_VERSION(12, 0, 0):
@@ -2678,19 +2835,13 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
adev->flags |= AMD_IS_APU;
break;
default:
break;
}
- if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0))
- adev->gmc.xgmi.supported = true;
-
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
- adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0);
-
/* set NBIO version */
switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(6, 1, 0):
@@ -2711,11 +2862,13 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
break;
case IP_VERSION(7, 9, 0):
+ case IP_VERSION(7, 9, 1):
adev->nbio.funcs = &nbio_v7_9_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_9_hdp_flush_reg;
break;
case IP_VERSION(7, 11, 0):
case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
case IP_VERSION(7, 11, 3):
adev->nbio.funcs = &nbio_v7_11_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg;
@@ -2843,6 +2996,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 10):
case IP_VERSION(11, 0, 11):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 9):
case IP_VERSION(13, 0, 10):
@@ -2852,6 +3006,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->smuio.funcs = &smuio_v13_0_funcs;
break;
case IP_VERSION(13, 0, 3):
+ case IP_VERSION(13, 0, 11):
adev->smuio.funcs = &smuio_v13_0_3_funcs;
if (adev->smuio.funcs->get_pkg_type(adev) == AMDGPU_PKG_TYPE_APU) {
adev->flags |= AMD_IS_APU;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index f5d36525ec3e..b44d56465c5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -33,6 +33,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
uint32_t *nps_type,
struct amdgpu_gmc_memrange **ranges,
- int *range_cnt);
+ int *range_cnt, bool refresh);
#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index b119d27271c1..35c778426a7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -33,6 +33,7 @@
#include "soc15_common.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
+#include "bif/bif_4_1_d.h"
#include <asm/div64.h>
#include <linux/pci.h>
@@ -1788,3 +1789,82 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev)
return 0;
}
+/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in amdgpu_dm_set_pixel()
+ * they are called from the panic handler, and protected by the drm_panic spinlock.
+ */
+static struct amdgpu_bo *panic_abo;
+
+/* Use the indirect MMIO to write each pixel to the GPU VRAM,
+ * This is a simplified version of amdgpu_device_mm_access()
+ */
+static void amdgpu_display_set_pixel(struct drm_scanout_buffer *sb,
+ unsigned int x,
+ unsigned int y,
+ u32 color)
+{
+ struct amdgpu_res_cursor cursor;
+ unsigned long offset;
+ struct amdgpu_bo *abo = panic_abo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+ uint32_t tmp;
+
+ offset = x * 4 + y * sb->pitch[0];
+ amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor);
+
+ tmp = cursor.start >> 31;
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x80000000);
+ if (tmp != 0xffffffff)
+ WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
+ WREG32_NO_KIQ(mmMM_DATA, color);
+}
+
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+ struct drm_scanout_buffer *sb)
+{
+ struct amdgpu_bo *abo;
+ struct drm_framebuffer *fb = plane->state->fb;
+
+ if (!fb)
+ return -EINVAL;
+
+ DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, &fb->format->format);
+
+ abo = gem_to_amdgpu_bo(fb->obj[0]);
+ if (!abo)
+ return -EINVAL;
+
+ sb->width = fb->width;
+ sb->height = fb->height;
+ /* Use the generic linear format, because tiling will be disabled in panic_flush() */
+ sb->format = drm_format_info(fb->format->format);
+ if (!sb->format)
+ return -EINVAL;
+
+ sb->pitch[0] = fb->pitches[0];
+
+ if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) {
+ if (abo->tbo.resource->mem_type != TTM_PL_VRAM) {
+ drm_warn(plane->dev, "amdgpu panic, framebuffer not in VRAM\n");
+ return -EINVAL;
+ }
+ /* Only handle 32bits format, to simplify mmio access */
+ if (fb->format->cpp[0] != 4) {
+ drm_warn(plane->dev, "amdgpu panic, pixel format is not 32bits\n");
+ return -EINVAL;
+ }
+ sb->set_pixel = amdgpu_display_set_pixel;
+ panic_abo = abo;
+ return 0;
+ }
+ if (!abo->kmap.virtual &&
+ ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) {
+ drm_warn(plane->dev, "amdgpu bo map failed, panic won't be displayed\n");
+ return -ENOMEM;
+ }
+ if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
+ iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual);
+ else
+ iosys_map_set_vaddr(&sb->map[0], abo->kmap.virtual);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 9d19940f73c8..dfa0d642ac16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,6 +23,8 @@
#ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__
+#include <drm/drm_panic.h>
+
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
@@ -49,4 +51,7 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier);
int amdgpu_display_suspend_helper(struct amdgpu_device *adev);
int amdgpu_display_resume_helper(struct amdgpu_device *adev);
+int amdgpu_display_get_scanout_buffer(struct drm_plane *plane,
+ struct drm_scanout_buffer *sb);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 8e81a83d37d8..44e120f9f764 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -36,12 +36,36 @@
#include "amdgpu_gem.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
#include <drm/amdgpu_drm.h>
#include <drm/ttm/ttm_tt.h>
#include <linux/dma-buf.h>
#include <linux/dma-fence-array.h>
#include <linux/pci-p2pdma.h>
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops;
+
+/**
+ * dma_buf_attach_adev - Helper to get adev of an attachment
+ *
+ * @attach: attachment
+ *
+ * Returns:
+ * A struct amdgpu_device * if the attaching device is an amdgpu device or
+ * partition, NULL otherwise.
+ */
+static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach)
+{
+ if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) {
+ struct drm_gem_object *obj = attach->importer_priv;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ return amdgpu_ttm_adev(bo->tbo.bdev);
+ }
+
+ return NULL;
+}
+
/**
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
*
@@ -53,13 +77,17 @@
static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
struct dma_buf_attachment *attach)
{
+ struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach);
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
+ if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) &&
+ pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
attach->peer2peer = false;
+ amdgpu_vm_bo_update_shared(bo);
+
return 0;
}
@@ -72,11 +100,35 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
*/
static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
{
- struct drm_gem_object *obj = attach->dmabuf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct dma_buf *dmabuf = attach->dmabuf;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv);
+ u32 domains = bo->allowed_domains;
+
+ dma_resv_assert_held(dmabuf->resv);
+
+ /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP
+ * support if all attachments can do P2P. If any attachment can't do
+ * P2P just pin into GTT instead.
+ *
+ * To avoid with conflicting pinnings between GPUs and RDMA when move
+ * notifiers are disabled, only allow pinning in VRAM when move
+ * notiers are enabled.
+ */
+ if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
+ domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+ } else {
+ list_for_each_entry(attach, &dmabuf->attachments, node)
+ if (!attach->peer2peer)
+ domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
+ }
- /* pin buffer into GTT */
- return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (domains & AMDGPU_GEM_DOMAIN_VRAM)
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+ if (WARN_ON(!domains))
+ return -EINVAL;
+
+ return amdgpu_bo_pin(bo, domains);
}
/**
@@ -131,9 +183,6 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return ERR_PTR(r);
-
- } else if (bo->tbo.resource->mem_type != TTM_PL_TT) {
- return ERR_PTR(-EBUSY);
}
switch (bo->tbo.resource->mem_type) {
@@ -150,6 +199,11 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
break;
case TTM_PL_VRAM:
+ /* XGMI-accessible memory should never be DMA-mapped */
+ if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible(
+ dma_buf_attach_adev(attach), bo)))
+ return ERR_PTR(-EINVAL);
+
r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0,
bo->tbo.base.size, attach->dev,
dir, &sgt);
@@ -181,7 +235,7 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
- if (sgt->sgl->page_link) {
+ if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);
@@ -345,7 +399,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
/* FIXME: This should be after the "if", but needs a fix to make sure
* DMABuf imports are initialized in the right VM list.
*/
- amdgpu_vm_bo_invalidate(adev, bo, false);
+ amdgpu_vm_bo_invalidate(bo, false);
if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
return;
@@ -456,6 +510,9 @@ bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
struct drm_gem_object *obj = &bo->tbo.base;
struct drm_gem_object *gobj;
+ if (!adev)
+ return false;
+
if (obj->import_attach) {
struct dma_buf *dma_buf = obj->import_attach->dmabuf;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 81d9877c8735..4db92e0a60da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,6 +23,7 @@
*/
#include <drm/amdgpu_drm.h>
+#include <drm/clients/drm_client_setup.h>
#include <drm/drm_drv.h>
#include <drm/drm_fbdev_ttm.h>
#include <drm/drm_gem.h>
@@ -50,6 +51,8 @@
#include "amdgpu_reset.h"
#include "amdgpu_sched.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_userq_fence.h"
#include "../amdxcp/amdgpu_xcp_drv.h"
/*
@@ -118,9 +121,14 @@
* - 3.57.0 - Compute tunneling on GFX10+
* - 3.58.0 - Add GFX12 DCC support
* - 3.59.0 - Cleared VRAM
+ * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement)
+ * - 3.61.0 - Contains fix for RV/PCO compute queues
+ * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT
+ * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size
+ * - 3.64.0 - Userq IP support query
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 59
+#define KMS_DRIVER_MINOR 64
#define KMS_DRIVER_PATCHLEVEL 0
/*
@@ -133,6 +141,9 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
+ AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
+ AMDGPU_DEBUG_SMU_POOL = BIT(7),
+ AMDGPU_DEBUG_VM_USERPTR = BIT(8),
};
unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -169,7 +180,8 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu;
char *amdgpu_virtual_display;
-bool enforce_isolation;
+int amdgpu_enforce_isolation = -1;
+int amdgpu_modeset = -1;
/* Specifies the default granularity for SVM, used in buffer
* migration and restoration of backing memory when handling
@@ -230,8 +242,8 @@ int amdgpu_agp = -1; /* auto */
int amdgpu_wbrf = -1;
int amdgpu_damage_clips = -1; /* auto */
int amdgpu_umsch_mm_fwlog;
-
-static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
+int amdgpu_rebar = -1; /* auto */
+int amdgpu_user_queue = -1;
DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
"DRM_UT_CORE",
@@ -247,9 +259,6 @@ DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
- .delayed_reset_work = __DELAYED_WORK_INITIALIZER(
- mgpu_info.delayed_reset_work,
- amdgpu_drv_delayed_reset_work_handler, 0),
};
int amdgpu_ras_enable = -1;
uint amdgpu_ras_mask = 0xffffffff;
@@ -284,7 +293,8 @@ module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
/**
* DOC: gttsize (int)
* Restrict the size of GTT domain (for userspace use) in MiB for testing.
- * The default is -1 (Use 1/2 RAM, minimum value is 3GB).
+ * The default is -1 (Use value specified by TTM).
+ * This parameter is deprecated and will be removed in the future.
*/
MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)");
module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
@@ -403,7 +413,7 @@ module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
* the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device).
*/
MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
-module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
+module_param_named_unsafe(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
/**
* DOC: bapm (int)
@@ -461,7 +471,7 @@ module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
* Enable experimental hw support (1 = enable). The default is 0 (disabled).
*/
MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
-module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
+module_param_named_unsafe(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
/**
* DOC: dc (int)
@@ -572,14 +582,14 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
* Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
*/
MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
-module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
+module_param_named_unsafe(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
/**
* DOC: emu_mode (int)
* Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled).
*/
MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
-module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
+module_param_named_unsafe(emu_mode, amdgpu_emu_mode, int, 0444);
/**
* DOC: ras_enable (int)
@@ -734,7 +744,7 @@ module_param_named(noretry, amdgpu_noretry, int, 0644);
*/
MODULE_PARM_DESC(force_asic_type,
"A non negative value used to specify the asic type for all supported GPUs");
-module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444);
+module_param_named_unsafe(force_asic_type, amdgpu_force_asic_type, int, 0444);
/**
* DOC: use_xgmi_p2p (int)
@@ -753,7 +763,7 @@ module_param_named(use_xgmi_p2p, amdgpu_use_xgmi_p2p, int, 0444);
* assigns queues to HQDs.
*/
int sched_policy = KFD_SCHED_POLICY_HWS;
-module_param(sched_policy, int, 0444);
+module_param_unsafe(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy,
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
@@ -803,7 +813,7 @@ MODULE_PARM_DESC(send_sigterm,
* Setting 1 enables halt on hang.
*/
int halt_if_hws_hang;
-module_param(halt_if_hws_hang, int, 0644);
+module_param_unsafe(halt_if_hws_hang, int, 0644);
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
/**
@@ -812,7 +822,7 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau
* check says. Default value: false (rely on MEC2 firmware version check).
*/
bool hws_gws_support;
-module_param(hws_gws_support, bool, 0444);
+module_param_unsafe(hws_gws_support, bool, 0444);
MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)");
/**
@@ -845,7 +855,7 @@ MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = defa
*/
int amdgpu_no_queue_eviction_on_vm_fault;
MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)");
-module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
+module_param_named_unsafe(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
#endif
/**
@@ -853,7 +863,7 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm
*/
int amdgpu_mtype_local;
MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)");
-module_param_named(mtype_local, amdgpu_mtype_local, int, 0444);
+module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444);
/**
* DOC: pcie_p2p (bool)
@@ -892,7 +902,7 @@ module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444);
* the ABM algorithm, with 1 being the least reduction and 4 being the most
* reduction.
*
- * Defaults to -1, or disabled. Userspace can only override this level after
+ * Defaults to -1, or auto. Userspace can only override this level after
* boot if it's set to auto.
*/
int amdgpu_dm_abm_level = -1;
@@ -957,7 +967,7 @@ module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444);
* GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
*/
MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)");
-module_param_named(reset_method, amdgpu_reset_method, int, 0644);
+module_param_named_unsafe(reset_method, amdgpu_reset_method, int, 0644);
/**
* DOC: bad_page_threshold (int) Bad page threshold is specifies the
@@ -965,7 +975,7 @@ module_param_named(reset_method, amdgpu_reset_method, int, 0644);
* result in the GPU entering bad status when the number of total
* faulty pages by ECC exceeds the threshold value.
*/
-MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = driver sets threshold)");
+MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = threshold determined by a formula, 0 < threshold < max records, user-defined threshold)");
module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
@@ -1029,11 +1039,20 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
/**
- * DOC: enforce_isolation (bool)
- * enforce process isolation between graphics and compute via using the same reserved vmid.
+ * DOC: enforce_isolation (int)
+ * enforce process isolation between graphics and compute.
+ * (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)
+ */
+module_param_named(enforce_isolation, amdgpu_enforce_isolation, int, 0444);
+MODULE_PARM_DESC(enforce_isolation,
+"enforce process isolation between graphics and compute. (-1 = auto, 0 = disable, 1 = enable, 2 = enable legacy mode, 3 = enable without cleaner shader)");
+
+/**
+ * DOC: modeset (int)
+ * Override nomodeset (1 = override, -1 = auto). The default is -1 (auto).
*/
-module_param(enforce_isolation, bool, 0444);
-MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
+MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)");
+module_param_named(modeset, amdgpu_modeset, int, 0444);
/**
* DOC: seamless (int)
@@ -1051,9 +1070,14 @@ module_param_named(seamless, amdgpu_seamless, int, 0444);
* limits the VRAM size reported to ROCm applications to the visible
* size, usually 256MB.
* - 0x4: Disable GPU soft recovery, always do a full reset
+ * - 0x8: Use VRAM for firmware loading
+ * - 0x10: Enable ACA based RAS logging
+ * - 0x20: Enable experimental resets
+ * - 0x40: Disable ring resets
+ * - 0x80: Use VRAM for SMU pool
*/
MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
-module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
+module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444);
/**
* DOC: agp (int)
@@ -1080,6 +1104,28 @@ MODULE_PARM_DESC(wbrf,
"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
module_param_named(wbrf, amdgpu_wbrf, int, 0444);
+/**
+ * DOC: rebar (int)
+ * Allow BAR resizing. Disable this to prevent the driver from attempting
+ * to resize the BAR if the GPU supports it and there is available MMIO space.
+ * Note that this just prevents the driver from resizing the BAR. The BIOS
+ * may have already resized the BAR at boot time.
+ */
+MODULE_PARM_DESC(rebar, "Resizable BAR (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(rebar, amdgpu_rebar, int, 0444);
+
+/**
+ * DOC: user_queue (int)
+ * Enable user queues on systems that support user queues. Possible values:
+ *
+ * - -1 = auto (ASIC specific default)
+ * - 0 = user queues disabled
+ * - 1 = user queues enabled and kernel queues enabled (if supported)
+ * - 2 = user queues enabled and kernel queues disabled
+ */
+MODULE_PARM_DESC(user_queue, "Enable user queues (-1 = auto (default), 0 = disable, 1 = enable, 2 = enable UQs and disable KQs)");
+module_param_named(user_queue, amdgpu_user_queue, int, 0444);
+
/* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers
*/
@@ -1793,7 +1839,6 @@ static const u16 amdgpu_unsupported_pciidlist[] = {
};
static const struct pci_device_id pciidlist[] = {
-#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6784, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
{0x1002, 0x6788, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -1866,8 +1911,6 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6665, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x6667, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
{0x1002, 0x666F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HAINAN|AMD_IS_MOBILITY},
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */
{0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_APU},
@@ -1950,7 +1993,6 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x985D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
{0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU},
-#endif
/* topaz */
{0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
{0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ},
@@ -2223,6 +2265,19 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: enable experimental reset features\n");
adev->debug_exp_resets = true;
}
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_RING_RESET) {
+ pr_info("debug: ring reset disabled\n");
+ adev->debug_disable_gpu_ring_reset = true;
+ }
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) {
+ pr_info("debug: use vram for smu pool\n");
+ adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM;
+ }
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_VM_USERPTR) {
+ pr_info("debug: VM mode debug for userptr is enabled\n");
+ adev->debug_vm_userptr = true;
+ }
}
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
@@ -2250,6 +2305,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
int ret, retry = 0, i;
bool supports_atomic = false;
+ if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
+ (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
+ if (drm_firmware_drivers_only() && amdgpu_modeset == -1)
+ return -EINVAL;
+ }
+
/* skip devices which are owned by radeon */
for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
if (amdgpu_unsupported_pciidlist[i] == pdev->device)
@@ -2282,14 +2343,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENOTSUPP;
}
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
#ifdef CONFIG_DRM_AMDGPU_SI
- if (!amdgpu_si_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_HAINAN:
+ if (!amdgpu_si_support) {
dev_info(&pdev->dev,
"SI support provided by radeon.\n");
dev_info(&pdev->dev,
@@ -2297,16 +2358,18 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
);
return -ENODEV;
}
- }
+ break;
+#else
+ dev_info(&pdev->dev, "amdgpu is built without SI support.\n");
+ return -ENODEV;
#endif
+ case CHIP_KAVERI:
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
#ifdef CONFIG_DRM_AMDGPU_CIK
- if (!amdgpu_cik_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_KAVERI:
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KABINI:
- case CHIP_MULLINS:
+ if (!amdgpu_cik_support) {
dev_info(&pdev->dev,
"CIK support provided by radeon.\n");
dev_info(&pdev->dev,
@@ -2314,8 +2377,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
);
return -ENODEV;
}
- }
+ break;
+#else
+ dev_info(&pdev->dev, "amdgpu is built without CIK support.\n");
+ return -ENODEV;
#endif
+ default:
+ break;
+ }
adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
if (IS_ERR(adev))
@@ -2365,11 +2434,15 @@ retry_init:
*/
if (adev->mode_info.mode_config_initialized &&
!list_empty(&adev_to_drm(adev)->mode_config.connector_list)) {
+ const struct drm_format_info *format;
+
/* select 8 bpp console on low vram cards */
if (adev->gmc.real_vram_size <= (32*1024*1024))
- drm_fbdev_ttm_setup(adev_to_drm(adev), 8);
+ format = drm_format_info(DRM_FORMAT_C8);
else
- drm_fbdev_ttm_setup(adev_to_drm(adev), 32);
+ format = NULL;
+
+ drm_client_setup(adev_to_drm(adev), format);
}
ret = amdgpu_debugfs_init(adev);
@@ -2434,6 +2507,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
struct amdgpu_device *adev = drm_to_adev(dev);
amdgpu_xcp_dev_unplug(adev);
+ amdgpu_gmc_prepare_nps_mode_change(adev);
drm_dev_unplug(dev);
if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
@@ -2472,82 +2546,6 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
adev->mp1_state = PP_MP1_STATE_NONE;
}
-/**
- * amdgpu_drv_delayed_reset_work_handler - work handler for reset
- *
- * @work: work_struct.
- */
-static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
-{
- struct list_head device_list;
- struct amdgpu_device *adev;
- int i, r;
- struct amdgpu_reset_context reset_context;
-
- memset(&reset_context, 0, sizeof(reset_context));
-
- mutex_lock(&mgpu_info.mutex);
- if (mgpu_info.pending_reset == true) {
- mutex_unlock(&mgpu_info.mutex);
- return;
- }
- mgpu_info.pending_reset = true;
- mutex_unlock(&mgpu_info.mutex);
-
- /* Use a common context, just need to make sure full reset is done */
- reset_context.method = AMD_RESET_METHOD_NONE;
- set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-
- for (i = 0; i < mgpu_info.num_dgpu; i++) {
- adev = mgpu_info.gpu_ins[i].adev;
- reset_context.reset_req_dev = adev;
- r = amdgpu_device_pre_asic_reset(adev, &reset_context);
- if (r) {
- dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
- r, adev_to_drm(adev)->unique);
- }
- if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work))
- r = -EALREADY;
- }
- for (i = 0; i < mgpu_info.num_dgpu; i++) {
- adev = mgpu_info.gpu_ins[i].adev;
- flush_work(&adev->xgmi_reset_work);
- adev->gmc.xgmi.pending_reset = false;
- }
-
- /* reset function will rebuild the xgmi hive info , clear it now */
- for (i = 0; i < mgpu_info.num_dgpu; i++)
- amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev);
-
- INIT_LIST_HEAD(&device_list);
-
- for (i = 0; i < mgpu_info.num_dgpu; i++)
- list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, &device_list);
-
- /* unregister the GPU first, reset function will add them back */
- list_for_each_entry(adev, &device_list, reset_list)
- amdgpu_unregister_gpu_instance(adev);
-
- /* Use a common context, just need to make sure full reset is done */
- set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
- set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
- r = amdgpu_do_asic_reset(&device_list, &reset_context);
-
- if (r) {
- DRM_ERROR("reinit gpus failure");
- return;
- }
- for (i = 0; i < mgpu_info.num_dgpu; i++) {
- adev = mgpu_info.gpu_ins[i].adev;
- if (!adev->kfd.init_complete) {
- kgd2kfd_init_zone_device(adev);
- amdgpu_amdkfd_device_init(adev);
- amdgpu_amdkfd_drm_client_create(adev);
- }
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
- }
-}
-
static int amdgpu_pmops_prepare(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
@@ -2580,13 +2578,24 @@ static int amdgpu_pmops_suspend(struct device *dev)
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
- adev->suspend_complete = false;
if (amdgpu_acpi_is_s0ix_active(adev))
adev->in_s0ix = true;
else if (amdgpu_acpi_is_s3_active(adev))
adev->in_s3 = true;
- if (!adev->in_s0ix && !adev->in_s3)
+ if (!adev->in_s0ix && !adev->in_s3) {
+ /* don't allow going deep first time followed by s2idle the next time */
+ if (adev->last_suspend_state != PM_SUSPEND_ON &&
+ adev->last_suspend_state != pm_suspend_target_state) {
+ drm_err_once(drm_dev, "Unsupported suspend state %d\n",
+ pm_suspend_target_state);
+ return -EINVAL;
+ }
return 0;
+ }
+
+ /* cache the state last used for suspend */
+ adev->last_suspend_state = pm_suspend_target_state;
+
return amdgpu_device_suspend(drm_dev, true);
}
@@ -2595,7 +2604,6 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev)
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
- adev->suspend_complete = true;
if (amdgpu_acpi_should_gpu_reset(adev))
return amdgpu_asic_reset(adev);
@@ -2629,9 +2637,7 @@ static int amdgpu_pmops_freeze(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int r;
- adev->in_s4 = true;
r = amdgpu_device_suspend(drm_dev, true);
- adev->in_s4 = false;
if (r)
return r;
@@ -2728,6 +2734,29 @@ static int amdgpu_runtime_idle_check_display(struct device *dev)
return 0;
}
+static int amdgpu_runtime_idle_check_userq(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ int queue_id;
+ int ret = 0;
+
+ mutex_lock(&adev->userq_mutex);
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+ ret = -EBUSY;
+ goto done;
+ }
+ }
+done:
+ mutex_unlock(&adev->userq_mutex);
+
+ return ret;
+}
+
static int amdgpu_pmops_runtime_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -2743,6 +2772,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
ret = amdgpu_runtime_idle_check_display(dev);
if (ret)
return ret;
+ ret = amdgpu_runtime_idle_check_userq(dev);
+ if (ret)
+ return ret;
/* wait for all rings to drain before suspending */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -2864,12 +2896,30 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
}
ret = amdgpu_runtime_idle_check_display(dev);
+ if (ret)
+ goto done;
+ ret = amdgpu_runtime_idle_check_userq(dev);
+done:
pm_runtime_mark_last_busy(dev);
pm_runtime_autosuspend(dev);
return ret;
}
+static int amdgpu_drm_release(struct inode *inode, struct file *filp)
+{
+ struct drm_file *file_priv = filp->private_data;
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+
+ if (fpriv) {
+ fpriv->evf_mgr.fd_closing = true;
+ amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
+ amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
+ }
+
+ return drm_release(inode, filp);
+}
+
long amdgpu_drm_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg)
{
@@ -2921,7 +2971,7 @@ static const struct file_operations amdgpu_driver_kms_fops = {
.owner = THIS_MODULE,
.open = drm_open,
.flush = amdgpu_flush,
- .release = drm_release,
+ .release = amdgpu_drm_release,
.unlocked_ioctl = amdgpu_drm_ioctl,
.mmap = drm_gem_mmap,
.poll = drm_poll,
@@ -2968,6 +3018,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
};
static const struct drm_driver amdgpu_kms_driver = {
@@ -2982,6 +3035,7 @@ static const struct drm_driver amdgpu_kms_driver = {
.num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
.dumb_create = amdgpu_mode_dumb_create,
.dumb_map_offset = amdgpu_mode_dumb_mmap,
+ DRM_FBDEV_TTM_DRIVER_OPS,
.fops = &amdgpu_driver_kms_fops,
.release = &amdgpu_driver_release_kms,
#ifdef CONFIG_PROC_FS
@@ -2992,7 +3046,6 @@ static const struct drm_driver amdgpu_kms_driver = {
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
- .date = DRIVER_DATE,
.major = KMS_DRIVER_MAJOR,
.minor = KMS_DRIVER_MINOR,
.patchlevel = KMS_DRIVER_PATCHLEVEL,
@@ -3008,6 +3061,7 @@ const struct drm_driver amdgpu_partition_driver = {
.num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),
.dumb_create = amdgpu_mode_dumb_create,
.dumb_map_offset = amdgpu_mode_dumb_mmap,
+ DRM_FBDEV_TTM_DRIVER_OPS,
.fops = &amdgpu_driver_kms_fops,
.release = &amdgpu_driver_release_kms,
@@ -3015,7 +3069,6 @@ const struct drm_driver amdgpu_partition_driver = {
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
- .date = DRIVER_DATE,
.major = KMS_DRIVER_MAJOR,
.minor = KMS_DRIVER_MINOR,
.patchlevel = KMS_DRIVER_PATCHLEVEL,
@@ -3050,9 +3103,6 @@ static int __init amdgpu_init(void)
{
int r;
- if (drm_firmware_drivers_only())
- return -EINVAL;
-
r = amdgpu_sync_init();
if (r)
goto error_sync;
@@ -3061,6 +3111,10 @@ static int __init amdgpu_init(void)
if (r)
goto error_fence;
+ r = amdgpu_userq_fence_slab_init();
+ if (r)
+ goto error_fence;
+
DRM_INFO("amdgpu kernel modesetting enabled.\n");
amdgpu_register_atpx_handler();
amdgpu_acpi_detect();
@@ -3068,6 +3122,12 @@ static int __init amdgpu_init(void)
/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
amdgpu_amdkfd_init();
+ if (amdgpu_pp_feature_mask & PP_OVERDRIVE_MASK) {
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ pr_crit("Overdrive is enabled, please disable it before "
+ "reporting any bugs unrelated to overdrive.\n");
+ }
+
/* let modprobe override vga console setting */
return pci_register_driver(&amdgpu_kms_pci_driver);
@@ -3086,6 +3146,7 @@ static void __exit amdgpu_exit(void)
amdgpu_acpi_release();
amdgpu_sync_fini();
amdgpu_fence_slab_fini();
+ amdgpu_userq_fence_slab_fini();
mmu_notifier_synchronize();
amdgpu_xcp_drv_release();
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
index 5bc2cb661af7..2d86cc6f7f4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h
@@ -40,7 +40,6 @@
#define DRIVER_NAME "amdgpu"
#define DRIVER_DESC "AMD GPU"
-#define DRIVER_DATE "20150101"
extern const struct drm_driver amdgpu_partition_driver;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
index 35fee3e8cde2..8cd69836dd99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
@@ -200,7 +200,7 @@ static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
dev_err_ratelimited(&i2c_adap->dev,
"maddr:0x%04X size:0x%02X:quirk max_%s_len must be > %d",
eeprom_addr, buf_size,
- read ? "read" : "write", EEPROM_OFFSET_SIZE);
+ str_read_write(read), EEPROM_OFFSET_SIZE);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
new file mode 100644
index 000000000000..8b919ad3af29
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/sched.h>
+#include <drm/drm_exec.h>
+#include "amdgpu.h"
+
+#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name)
+#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr)
+
+static const char *
+amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "amdgpu_eviction_fence";
+}
+
+static const char *
+amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_eviction_fence *ef;
+
+ ef = container_of(f, struct amdgpu_eviction_fence, base);
+ return ef->timeline_name;
+}
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct drm_exec *exec)
+{
+ struct amdgpu_eviction_fence *old_ef, *new_ef;
+ struct drm_gem_object *obj;
+ unsigned long index;
+ int ret;
+
+ if (evf_mgr->ev_fence &&
+ !dma_fence_is_signaled(&evf_mgr->ev_fence->base))
+ return 0;
+ /*
+ * Steps to replace eviction fence:
+ * * lock all objects in exec (caller)
+ * * create a new eviction fence
+ * * update new eviction fence in evf_mgr
+ * * attach the new eviction fence to BOs
+ * * release the old fence
+ * * unlock the objects (caller)
+ */
+ new_ef = amdgpu_eviction_fence_create(evf_mgr);
+ if (!new_ef) {
+ DRM_ERROR("Failed to create new eviction fence\n");
+ return -ENOMEM;
+ }
+
+ /* Update the eviction fence now */
+ spin_lock(&evf_mgr->ev_fence_lock);
+ old_ef = evf_mgr->ev_fence;
+ evf_mgr->ev_fence = new_ef;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ /* Attach the new fence */
+ drm_exec_for_each_locked_object(exec, index, obj) {
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+ if (!bo)
+ continue;
+ ret = amdgpu_eviction_fence_attach(evf_mgr, bo);
+ if (ret) {
+ DRM_ERROR("Failed to attch new eviction fence\n");
+ goto free_err;
+ }
+ }
+
+ /* Free old fence */
+ if (old_ef)
+ dma_fence_put(&old_ef->base);
+ return 0;
+
+free_err:
+ kfree(new_ef);
+ return ret;
+}
+
+static void
+amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
+{
+ struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work);
+ struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_eviction_fence *ev_fence;
+
+ mutex_lock(&uq_mgr->userq_mutex);
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ if (ev_fence)
+ dma_fence_get(&ev_fence->base);
+ else
+ goto unlock;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ amdgpu_userq_evict(uq_mgr, ev_fence);
+
+ mutex_unlock(&uq_mgr->userq_mutex);
+ dma_fence_put(&ev_fence->base);
+ return;
+
+unlock:
+ spin_unlock(&evf_mgr->ev_fence_lock);
+ mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
+{
+ struct amdgpu_eviction_fence_mgr *evf_mgr;
+ struct amdgpu_eviction_fence *ev_fence;
+
+ if (!f)
+ return true;
+
+ ev_fence = to_ev_fence(f);
+ evf_mgr = ev_fence->evf_mgr;
+
+ schedule_delayed_work(&evf_mgr->suspend_work, 0);
+ return true;
+}
+
+static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
+ .use_64bit_seqno = true,
+ .get_driver_name = amdgpu_eviction_fence_get_driver_name,
+ .get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
+ .enable_signaling = amdgpu_eviction_fence_enable_signaling,
+};
+
+void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_eviction_fence *ev_fence)
+{
+ spin_lock(&evf_mgr->ev_fence_lock);
+ dma_fence_signal(&ev_fence->base);
+ spin_unlock(&evf_mgr->ev_fence_lock);
+}
+
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+ ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
+ if (!ev_fence)
+ return NULL;
+
+ ev_fence->evf_mgr = evf_mgr;
+ get_task_comm(ev_fence->timeline_name, current);
+ spin_lock_init(&ev_fence->lock);
+ dma_fence_init(&ev_fence->base, &amdgpu_eviction_fence_ops,
+ &ev_fence->lock, evf_mgr->ev_fence_ctx,
+ atomic_inc_return(&evf_mgr->ev_fence_seq));
+ return ev_fence;
+}
+
+void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+ /* Wait for any pending work to execute */
+ flush_delayed_work(&evf_mgr->suspend_work);
+
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ if (!ev_fence)
+ return;
+
+ dma_fence_wait(&ev_fence->base, false);
+
+ /* Last unref of ev_fence */
+ dma_fence_put(&ev_fence->base);
+}
+
+int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+ struct dma_resv *resv = bo->tbo.base.resv;
+ int ret;
+
+ if (!resv)
+ return 0;
+
+ ret = dma_resv_reserve_fences(resv, 1);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Failed to resv fence space\n");
+ return ret;
+ }
+
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ if (ev_fence)
+ dma_resv_add_fence(resv, &ev_fence->base, DMA_RESV_USAGE_BOOKKEEP);
+ spin_unlock(&evf_mgr->ev_fence_lock);
+
+ return 0;
+}
+
+void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo)
+{
+ struct dma_fence *stub = dma_fence_get_stub();
+
+ dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx,
+ stub, DMA_RESV_USAGE_BOOKKEEP);
+ dma_fence_put(stub);
+}
+
+int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ /* This needs to be done one time per open */
+ atomic_set(&evf_mgr->ev_fence_seq, 0);
+ evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
+ spin_lock_init(&evf_mgr->ev_fence_lock);
+
+ INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
new file mode 100644
index 000000000000..fcd867b7147d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_EV_FENCE_H_
+#define AMDGPU_EV_FENCE_H_
+
+struct amdgpu_eviction_fence {
+ struct dma_fence base;
+ spinlock_t lock;
+ char timeline_name[TASK_COMM_LEN];
+ struct amdgpu_eviction_fence_mgr *evf_mgr;
+};
+
+struct amdgpu_eviction_fence_mgr {
+ u64 ev_fence_ctx;
+ atomic_t ev_fence_seq;
+ spinlock_t ev_fence_lock;
+ struct amdgpu_eviction_fence *ev_fence;
+ struct delayed_work suspend_work;
+ uint8_t fd_closing;
+};
+
+/* Eviction fence helper functions */
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+int
+amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo);
+
+void
+amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_bo *bo);
+
+int
+amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct amdgpu_eviction_fence *ev_fence);
+
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+ struct drm_exec *exec);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
index c7df7fa3459f..91d638098889 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
@@ -33,6 +33,7 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_debugfs.h>
#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
@@ -59,20 +60,20 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
struct amdgpu_fpriv *fpriv = file->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_mem_stats stats;
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
ktime_t usage[AMDGPU_HW_IP_NUM];
- unsigned int hw_ip;
- int ret;
-
- memset(&stats, 0, sizeof(stats));
-
- ret = amdgpu_bo_reserve(vm->root.bo, false);
- if (ret)
- return;
-
- amdgpu_vm_get_memory(vm, &stats);
- amdgpu_bo_unreserve(vm->root.bo);
+ const char *pl_name[] = {
+ [TTM_PL_VRAM] = "vram",
+ [TTM_PL_TT] = "gtt",
+ [TTM_PL_SYSTEM] = "cpu",
+ [AMDGPU_PL_GDS] = "gds",
+ [AMDGPU_PL_GWS] = "gws",
+ [AMDGPU_PL_OA] = "oa",
+ [AMDGPU_PL_DOORBELL] = "doorbell",
+ };
+ unsigned int hw_ip, i;
+ amdgpu_vm_get_memory(vm, stats);
amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage);
/*
@@ -82,24 +83,35 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
*/
drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid);
- drm_printf(p, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL);
- drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL);
- drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL);
- drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n",
- stats.visible_vram/1024UL);
+
+ for (i = 0; i < ARRAY_SIZE(pl_name); i++) {
+ if (!pl_name[i])
+ continue;
+
+ drm_print_memory_stats(p,
+ &stats[i].drm,
+ DRM_GEM_OBJECT_RESIDENT |
+ DRM_GEM_OBJECT_PURGEABLE,
+ pl_name[i]);
+ }
+
+ /* Legacy amdgpu keys, alias to drm-resident-memory-: */
+ drm_printf(p, "drm-memory-vram:\t%llu KiB\n",
+ stats[TTM_PL_VRAM].drm.resident/1024UL);
+ drm_printf(p, "drm-memory-gtt: \t%llu KiB\n",
+ stats[TTM_PL_TT].drm.resident/1024UL);
+ drm_printf(p, "drm-memory-cpu: \t%llu KiB\n",
+ stats[TTM_PL_SYSTEM].drm.resident/1024UL);
+
+ /* Amdgpu specific memory accounting keys: */
drm_printf(p, "amd-evicted-vram:\t%llu KiB\n",
- stats.evicted_vram/1024UL);
- drm_printf(p, "amd-evicted-visible-vram:\t%llu KiB\n",
- stats.evicted_visible_vram/1024UL);
+ stats[TTM_PL_VRAM].evicted/1024UL);
drm_printf(p, "amd-requested-vram:\t%llu KiB\n",
- stats.requested_vram/1024UL);
- drm_printf(p, "amd-requested-visible-vram:\t%llu KiB\n",
- stats.requested_visible_vram/1024UL);
+ (stats[TTM_PL_VRAM].drm.shared +
+ stats[TTM_PL_VRAM].drm.private) / 1024UL);
drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
- stats.requested_gtt/1024UL);
- drm_printf(p, "drm-shared-vram:\t%llu KiB\n", stats.vram_shared/1024UL);
- drm_printf(p, "drm-shared-gtt:\t%llu KiB\n", stats.gtt_shared/1024UL);
- drm_printf(p, "drm-shared-cpu:\t%llu KiB\n", stats.cpu_shared/1024UL);
+ (stats[TTM_PL_TT].drm.shared +
+ stats[TTM_PL_TT].drm.private) / 1024UL);
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
if (!usage[hw_ip])
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 2f24a6aa13bf..5fec808d7f54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -41,22 +41,6 @@
#include "amdgpu_trace.h"
#include "amdgpu_reset.h"
-/*
- * Fences mark an event in the GPUs pipeline and are used
- * for GPU/CPU synchronization. When the fence is written,
- * it is expected that all buffers associated with that fence
- * are no longer in use by the associated ring on the GPU and
- * that the relevant GPU caches have been flushed.
- */
-
-struct amdgpu_fence {
- struct dma_fence base;
-
- /* RB, DMA, etc. */
- struct amdgpu_ring *ring;
- ktime_t start_timestamp;
-};
-
static struct kmem_cache *amdgpu_fence_slab;
int amdgpu_fence_slab_init(void)
@@ -151,12 +135,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC);
if (am_fence == NULL)
return -ENOMEM;
- fence = &am_fence->base;
- am_fence->ring = ring;
} else {
/* take use of job-embedded fence */
- fence = &job->hw_fence;
+ am_fence = &job->hw_fence;
}
+ fence = &am_fence->base;
+ am_fence->ring = ring;
seq = ++ring->fence_drv.sync_seq;
if (job && job->job_run_counter) {
@@ -280,7 +264,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
- if (del_timer(&ring->fence_drv.fallback_timer) &&
+ if (timer_delete(&ring->fence_drv.fallback_timer) &&
seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
@@ -322,8 +306,8 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
*/
static void amdgpu_fence_fallback(struct timer_list *t)
{
- struct amdgpu_ring *ring = from_timer(ring, t,
- fence_drv.fallback_timer);
+ struct amdgpu_ring *ring = timer_container_of(ring, t,
+ fence_drv.fallback_timer);
if (amdgpu_fence_process(ring))
DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
@@ -618,7 +602,7 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
- del_timer_sync(&ring->fence_drv.fallback_timer);
+ timer_delete_sync(&ring->fence_drv.fallback_timer);
}
}
@@ -718,7 +702,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
* it right here or we won't be able to track them in fence_drv
* and they will remain unsignaled during sa_bo free.
*/
- job = container_of(old, struct amdgpu_job, hw_fence);
+ job = container_of(old, struct amdgpu_job, hw_fence.base);
if (!job->base.s_fence && !dma_fence_is_signaled(old))
dma_fence_signal(old);
RCU_INIT_POINTER(*ptr, NULL);
@@ -780,7 +764,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
+ struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
return (const char *)to_amdgpu_ring(job->base.sched)->name;
}
@@ -810,7 +794,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
*/
static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
{
- struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
+ struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
@@ -845,7 +829,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu)
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
/* free job if fence has a parent job */
- kfree(container_of(f, struct amdgpu_job, hw_fence));
+ kfree(container_of(f, struct amdgpu_job, hw_fence.base));
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index ceb5163480f4..1ae88c459da5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -31,6 +31,7 @@
#define FRU_EEPROM_MADDR_6 0x60000
#define FRU_EEPROM_MADDR_8 0x80000
+#define FRU_EEPROM_MADDR_INV 0xFFFFF
static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
{
@@ -63,10 +64,10 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
switch (adev->asic_type) {
case CHIP_VEGA20:
/* D161 and D163 are the VG20 server SKUs */
- if (strnstr(atom_ctx->vbios_pn, "D161",
- sizeof(atom_ctx->vbios_pn)) ||
- strnstr(atom_ctx->vbios_pn, "D163",
- sizeof(atom_ctx->vbios_pn))) {
+ if (atom_ctx && (strnstr(atom_ctx->vbios_pn, "D161",
+ sizeof(atom_ctx->vbios_pn)) ||
+ strnstr(atom_ctx->vbios_pn, "D163",
+ sizeof(atom_ctx->vbios_pn)))) {
if (fru_addr)
*fru_addr = FRU_EEPROM_MADDR_6;
return true;
@@ -78,8 +79,8 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
return false;
}
case IP_VERSION(11, 0, 7):
- if (strnstr(atom_ctx->vbios_pn, "D603",
- sizeof(atom_ctx->vbios_pn))) {
+ if (atom_ctx && strnstr(atom_ctx->vbios_pn, "D603",
+ sizeof(atom_ctx->vbios_pn))) {
if (strnstr(atom_ctx->vbios_pn, "D603GLXE",
sizeof(atom_ctx->vbios_pn))) {
return false;
@@ -94,8 +95,8 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
}
case IP_VERSION(13, 0, 2):
/* All Aldebaran SKUs have an FRU */
- if (!strnstr(atom_ctx->vbios_pn, "D673",
- sizeof(atom_ctx->vbios_pn)))
+ if (atom_ctx && !strnstr(atom_ctx->vbios_pn, "D673",
+ sizeof(atom_ctx->vbios_pn)))
if (fru_addr)
*fru_addr = FRU_EEPROM_MADDR_6;
return true;
@@ -104,6 +105,10 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
if (fru_addr)
*fru_addr = FRU_EEPROM_MADDR_8;
return true;
+ case IP_VERSION(13, 0, 12):
+ if (fru_addr)
+ *fru_addr = FRU_EEPROM_MADDR_INV;
+ return true;
default:
return false;
}
@@ -120,6 +125,10 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
if (!is_fru_eeprom_supported(adev, &fru_addr))
return 0;
+ /* FRU data avaialble, but no direct EEPROM access */
+ if (fru_addr == FRU_EEPROM_MADDR_INV)
+ return 0;
+
if (!adev->fru_info) {
adev->fru_info = kzalloc(sizeof(*adev->fru_info), GFP_KERNEL);
if (!adev->fru_info)
@@ -384,7 +393,7 @@ int amdgpu_fru_sysfs_init(struct amdgpu_device *adev)
void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev)
{
- if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info)
+ if (!adev->fru_info)
return;
sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
index bc58dca18035..98f3196599ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h
@@ -32,7 +32,7 @@ struct amdgpu_fru_info {
char product_name[AMDGPU_PRODUCT_NAME_LEN];
char serial[20];
char manufacturer_name[32];
- char fru_id[32];
+ char fru_id[50];
};
int amdgpu_fru_get_product_info(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
index 2d4b67175b55..328a1b963548 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c
@@ -122,6 +122,10 @@ static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
return 0;
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 2) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(14, 0, 3))
+ return 0;
+
if (adev->asic_type >= CHIP_SIENNA_CICHLID)
return 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 256b95232de5..b2033f8352f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -78,8 +78,9 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
if (adev->dummy_page_addr)
return 0;
- adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0,
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ adev->dummy_page_addr = dma_map_page_attrs(&adev->pdev->dev, dummy_page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) {
dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
adev->dummy_page_addr = 0;
@@ -99,8 +100,9 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
{
if (!adev->dummy_page_addr)
return;
- dma_unmap_page(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
+ dma_unmap_page_attrs(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
adev->dummy_page_addr = 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 1a5df8b94661..0ecc88df7208 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -36,12 +36,122 @@
#include <drm/drm_exec.h>
#include <drm/drm_gem_ttm_helper.h>
#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_syncobj.h>
#include "amdgpu.h"
#include "amdgpu_display.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_hmm.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_vm.h"
+
+static int
+amdgpu_gem_add_input_fence(struct drm_file *filp,
+ uint64_t syncobj_handles_array,
+ uint32_t num_syncobj_handles)
+{
+ struct dma_fence *fence;
+ uint32_t *syncobj_handles;
+ int ret, i;
+
+ if (!num_syncobj_handles)
+ return 0;
+
+ syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array),
+ size_mul(sizeof(uint32_t), num_syncobj_handles));
+ if (IS_ERR(syncobj_handles))
+ return PTR_ERR(syncobj_handles);
+
+ for (i = 0; i < num_syncobj_handles; i++) {
+
+ if (!syncobj_handles[i]) {
+ ret = -EINVAL;
+ goto free_memdup;
+ }
+
+ ret = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence);
+ if (ret)
+ goto free_memdup;
+
+ dma_fence_wait(fence, false);
+
+ /* TODO: optimize async handling */
+ dma_fence_put(fence);
+ }
+
+free_memdup:
+ kfree(syncobj_handles);
+ return ret;
+}
+
+static int
+amdgpu_gem_update_timeline_node(struct drm_file *filp,
+ uint32_t syncobj_handle,
+ uint64_t point,
+ struct drm_syncobj **syncobj,
+ struct dma_fence_chain **chain)
+{
+ if (!syncobj_handle)
+ return 0;
+
+ /* Find the sync object */
+ *syncobj = drm_syncobj_find(filp, syncobj_handle);
+ if (!*syncobj)
+ return -ENOENT;
+
+ if (!point)
+ return 0;
+
+ /* Allocate the chain node */
+ *chain = dma_fence_chain_alloc();
+ if (!*chain) {
+ drm_syncobj_put(*syncobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void
+amdgpu_gem_update_bo_mapping(struct drm_file *filp,
+ struct amdgpu_bo_va *bo_va,
+ uint32_t operation,
+ uint64_t point,
+ struct dma_fence *fence,
+ struct drm_syncobj *syncobj,
+ struct dma_fence_chain *chain)
+{
+ struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct dma_fence *last_update;
+
+ if (!syncobj)
+ return;
+
+ /* Find the last update fence */
+ switch (operation) {
+ case AMDGPU_VA_OP_MAP:
+ case AMDGPU_VA_OP_REPLACE:
+ if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
+ last_update = vm->last_update;
+ else
+ last_update = bo_va->last_pt_update;
+ break;
+ case AMDGPU_VA_OP_UNMAP:
+ case AMDGPU_VA_OP_CLEAR:
+ last_update = fence;
+ break;
+ default:
+ return;
+ }
+
+ /* Add fence to timeline */
+ if (!point)
+ drm_syncobj_replace_fence(syncobj, last_update);
+ else
+ drm_syncobj_add_point(syncobj, chain, last_update, point);
+}
static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf)
{
@@ -87,10 +197,8 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
{
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
- if (aobj) {
- amdgpu_hmm_unregister(aobj);
- ttm_bo_put(&aobj->tbo);
- }
+ amdgpu_hmm_unregister(aobj);
+ ttm_bo_put(&aobj->tbo);
}
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
@@ -179,11 +287,21 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
if (r)
return r;
+ amdgpu_vm_bo_update_shared(abo);
bo_va = amdgpu_vm_bo_find(vm, abo);
if (!bo_va)
bo_va = amdgpu_vm_bo_add(adev, vm, abo);
else
++bo_va->ref_count;
+
+ /* attach gfx eviction fence */
+ r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo);
+ if (r) {
+ DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
+ amdgpu_bo_unreserve(abo);
+ return r;
+ }
+
amdgpu_bo_unreserve(abo);
/* Validate and add eviction fence to DMABuf imports with dynamic
@@ -247,11 +365,15 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
goto out_unlock;
}
+ if (!amdgpu_vm_is_bo_always_valid(vm, bo))
+ amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
+
bo_va = amdgpu_vm_bo_find(vm, bo);
if (!bo_va || --bo_va->ref_count)
goto out_unlock;
amdgpu_vm_bo_del(adev, bo_va);
+ amdgpu_vm_bo_update_shared(bo);
if (!amdgpu_vm_ready(vm))
goto out_unlock;
@@ -320,10 +442,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
uint32_t handle, initial_domain;
int r;
- /* reject DOORBELLs until userspace code to use it is available */
- if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL)
- return -EINVAL;
-
/* reject invalid gem flags */
if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
@@ -637,18 +755,23 @@ out:
*
* Update the bo_va directly after setting its address. Errors are not
* vital here, so they are not reported back to userspace.
+ *
+ * Returns resulting fence if freed BO(s) got cleared from the PT.
+ * otherwise stub fence in case of error.
*/
-static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_bo_va *bo_va,
- uint32_t operation)
+static struct dma_fence *
+amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo_va *bo_va,
+ uint32_t operation)
{
+ struct dma_fence *fence = dma_fence_get_stub();
int r;
if (!amdgpu_vm_ready(vm))
- return;
+ return fence;
- r = amdgpu_vm_clear_freed(adev, vm, NULL);
+ r = amdgpu_vm_clear_freed(adev, vm, &fence);
if (r)
goto error;
@@ -664,6 +787,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
error:
if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
+
+ return fence;
}
/**
@@ -712,6 +837,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_bo *abo;
struct amdgpu_bo_va *bo_va;
+ struct drm_syncobj *timeline_syncobj = NULL;
+ struct dma_fence_chain *timeline_chain = NULL;
+ struct dma_fence *fence;
struct drm_exec exec;
uint64_t va_flags;
uint64_t vm_size;
@@ -773,6 +901,12 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
abo = NULL;
}
+ r = amdgpu_gem_add_input_fence(filp,
+ args->input_fence_syncobj_handles,
+ args->num_syncobj_handles);
+ if (r)
+ goto error_put_gobj;
+
drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
@@ -801,6 +935,14 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
bo_va = NULL;
}
+ r = amdgpu_gem_update_timeline_node(filp,
+ args->vm_timeline_syncobj_out,
+ args->vm_timeline_point,
+ &timeline_syncobj,
+ &timeline_chain);
+ if (r)
+ goto error;
+
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
@@ -826,12 +968,24 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
default:
break;
}
- if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm)
- amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
- args->operation);
+ if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
+ fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
+ args->operation);
+
+ if (timeline_syncobj)
+ amdgpu_gem_update_bo_mapping(filp, bo_va,
+ args->operation,
+ args->vm_timeline_point,
+ fence, timeline_syncobj,
+ timeline_chain);
+ else
+ dma_fence_put(fence);
+
+ }
error:
drm_exec_fini(&exec);
+error_put_gobj:
drm_gem_object_put(gobj);
return r;
}
@@ -839,7 +993,6 @@ error:
int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
- struct amdgpu_device *adev = drm_to_adev(dev);
struct drm_amdgpu_gem_op *args = data;
struct drm_gem_object *gobj;
struct amdgpu_vm_bo_base *base;
@@ -899,7 +1052,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
- amdgpu_vm_bo_invalidate(adev, robj, true);
+ amdgpu_vm_bo_invalidate(robj, true);
amdgpu_bo_unreserve(robj);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index f1ffab5a1eae..c5646af055ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -33,6 +33,7 @@
#include "amdgpu_reset.h"
#include "amdgpu_xcp.h"
#include "amdgpu_xgmi.h"
+#include "nvd.h"
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
@@ -74,29 +75,20 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
}
-int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
- int me, int pipe, int queue)
+static int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
+ int me, int pipe, int queue)
{
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
int bit = 0;
bit += me * adev->gfx.me.num_pipe_per_me
- * adev->gfx.me.num_queue_per_pipe;
- bit += pipe * adev->gfx.me.num_queue_per_pipe;
+ * num_queue_per_pipe;
+ bit += pipe * num_queue_per_pipe;
bit += queue;
return bit;
}
-void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
- int *me, int *pipe, int *queue)
-{
- *queue = bit % adev->gfx.me.num_queue_per_pipe;
- *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
- % adev->gfx.me.num_pipe_per_me;
- *me = (bit / adev->gfx.me.num_queue_per_pipe)
- / adev->gfx.me.num_pipe_per_me;
-}
-
bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
int me, int pipe, int queue)
{
@@ -248,8 +240,8 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
{
int i, queue, pipe;
bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
- int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
- adev->gfx.me.num_queue_per_pipe;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+ int max_queues_per_me = adev->gfx.me.num_pipe_per_me * num_queue_per_pipe;
if (multipipe_policy) {
/* policy: amdgpu owns the first queue per pipe at this stage
@@ -257,9 +249,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
for (i = 0; i < max_queues_per_me; i++) {
pipe = i % adev->gfx.me.num_pipe_per_me;
queue = (i / adev->gfx.me.num_pipe_per_me) %
- adev->gfx.me.num_queue_per_pipe;
+ num_queue_per_pipe;
- set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
+ set_bit(pipe * num_queue_per_pipe + queue,
adev->gfx.me.queue_bitmap);
}
} else {
@@ -268,8 +260,9 @@ void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
}
/* update the number of active graphics rings */
- adev->gfx.num_gfx_rings =
- bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+ if (adev->gfx.num_gfx_rings)
+ adev->gfx.num_gfx_rings =
+ bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
}
static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
@@ -415,7 +408,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
}
/* prepare MQD backup */
- kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
+ kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL);
if (!kiq->mqd_backup) {
dev_warn(adev->dev,
"no memory to create MQD backup for ring %s\n", ring->name);
@@ -438,7 +431,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
ring->mqd_size = mqd_size;
/* prepare MQD backup */
- adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
+ adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL);
if (!adev->gfx.me.mqd_backup[i]) {
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
return -ENOMEM;
@@ -462,7 +455,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
ring->mqd_size = mqd_size;
/* prepare MQD backup */
- adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
+ adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL);
if (!adev->gfx.mec.mqd_backup[j]) {
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
return -ENOMEM;
@@ -525,6 +518,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
+ if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
spin_lock(&kiq->ring_lock);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
adev->gfx.num_compute_rings)) {
@@ -538,20 +534,15 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
&adev->gfx.compute_ring[j],
RESET_QUEUES, 0, 0);
}
-
- /**
- * This is workaround: only skip kiq_ring test
- * during ras recovery in suspend stage for gfx9.4.3
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
*/
- if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
- amdgpu_ras_in_recovery(adev)) {
- spin_unlock(&kiq->ring_lock);
- return 0;
- }
+ r = amdgpu_ring_test_helper(kiq_ring);
- if (kiq_ring->sched.ready && !adev->job_hang)
- r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
return r;
@@ -579,8 +570,11 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
- spin_lock(&kiq->ring_lock);
+ if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev))
+ return 0;
+
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
+ spin_lock(&kiq->ring_lock);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
adev->gfx.num_gfx_rings)) {
spin_unlock(&kiq->ring_lock);
@@ -593,11 +587,17 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
&adev->gfx.gfx_ring[j],
PREEMPT_QUEUES, 0, 0);
}
- }
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
- if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang)
+ /*
+ * Ring test will do a basic scratch register change check.
+ * Just run this to ensure that unmap queues that is submitted
+ * before got processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
- spin_unlock(&kiq->ring_lock);
+ spin_unlock(&kiq->ring_lock);
+ }
return r;
}
@@ -702,7 +702,13 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
kiq->pmf->kiq_map_queues(kiq_ring,
&adev->gfx.compute_ring[j]);
}
-
+ /* Submit map queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that map queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
if (r)
@@ -753,7 +759,13 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
&adev->gfx.gfx_ring[j]);
}
}
-
+ /* Submit map queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that map queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
if (r)
@@ -762,18 +774,8 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
return r;
}
-/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
- *
- * @adev: amdgpu_device pointer
- * @bool enable true: enable gfx off feature, false: disable gfx off feature
- *
- * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
- * 2. other client can send request to disable gfx off feature, the request should be honored.
- * 3. other client can cancel their request of disable gfx off feature
- * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
- */
-
-void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
+static void amdgpu_gfx_do_off_ctrl(struct amdgpu_device *adev, bool enable,
+ bool no_delay)
{
unsigned long delay = GFX_OFF_DELAY_ENABLE;
@@ -795,9 +797,9 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
if (adev->gfx.gfx_off_req_count == 0 &&
!adev->gfx.gfx_off_state) {
/* If going to s2idle, no need to wait */
- if (adev->in_s0ix) {
+ if (no_delay) {
if (!amdgpu_dpm_set_powergating_by_smu(adev,
- AMD_IP_BLOCK_TYPE_GFX, true))
+ AMD_IP_BLOCK_TYPE_GFX, true, 0))
adev->gfx.gfx_off_state = true;
} else {
schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
@@ -809,7 +811,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
if (adev->gfx.gfx_off_state &&
- !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
+ !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) {
adev->gfx.gfx_off_state = false;
if (adev->gfx.funcs->init_spm_golden) {
@@ -827,6 +829,43 @@ unlock:
mutex_unlock(&adev->gfx.gfx_off_mutex);
}
+/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ *
+ * gfx off allow will be delayed by GFX_OFF_DELAY_ENABLE ms.
+ */
+void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
+{
+ /* If going to s2idle, no need to wait */
+ bool no_delay = adev->in_s0ix ? true : false;
+
+ amdgpu_gfx_do_off_ctrl(adev, enable, no_delay);
+}
+
+/* amdgpu_gfx_off_ctrl_immediate - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ *
+ * gfx off allow will be issued immediately.
+ */
+void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable)
+{
+ amdgpu_gfx_do_off_ctrl(adev, enable, true);
+}
+
int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
{
int r = 0;
@@ -895,6 +934,9 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r
if (r)
return r;
+ if (amdgpu_sriov_vf(adev))
+ return r;
+
if (adev->gfx.cp_ecc_error_irq.funcs) {
r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
if (r)
@@ -1312,6 +1354,10 @@ static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
struct amdgpu_device *adev = drm_to_adev(ddev);
int mode;
+ /* Only minimal precaution taken to reject requests while in reset.*/
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+
mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
AMDGPU_XCP_FL_NONE);
@@ -1355,43 +1401,49 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
return -EINVAL;
}
+ /* Don't allow a switch while under reset */
+ if (!down_read_trylock(&adev->reset_domain->sem))
+ return -EPERM;
+
ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
+ up_read(&adev->reset_domain->sem);
+
if (ret)
return ret;
return count;
}
+static const char *xcp_desc[] = {
+ [AMDGPU_SPX_PARTITION_MODE] = "SPX",
+ [AMDGPU_DPX_PARTITION_MODE] = "DPX",
+ [AMDGPU_TPX_PARTITION_MODE] = "TPX",
+ [AMDGPU_QPX_PARTITION_MODE] = "QPX",
+ [AMDGPU_CPX_PARTITION_MODE] = "CPX",
+};
+
static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
struct device_attribute *addr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- char *supported_partition;
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ int size = 0, mode;
+ char *sep = "";
- /* TBD */
- switch (NUM_XCC(adev->gfx.xcc_mask)) {
- case 8:
- supported_partition = "SPX, DPX, QPX, CPX";
- break;
- case 6:
- supported_partition = "SPX, TPX, CPX";
- break;
- case 4:
- supported_partition = "SPX, DPX, CPX";
- break;
- /* this seems only existing in emulation phase */
- case 2:
- supported_partition = "SPX, CPX";
- break;
- default:
- supported_partition = "Not supported";
- break;
+ if (!xcp_mgr || !xcp_mgr->avail_xcp_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_mgr->avail_xcp_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
+ sep = ", ";
}
- return sysfs_emit(buf, "%s\n", supported_partition);
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
}
static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
@@ -1399,9 +1451,11 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
struct drm_gpu_scheduler *sched = &ring->sched;
struct drm_sched_entity entity;
+ static atomic_t counter;
struct dma_fence *f;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
+ void *owner;
int i, r;
/* Initialize the scheduler entity */
@@ -1412,13 +1466,21 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
goto err;
}
- r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL,
- 64, 0,
- &job);
+ /*
+ * Use some unique dummy value as the owner to make sure we execute
+ * the cleaner shader on each submission. The value just need to change
+ * for each submission and is otherwise meaningless.
+ */
+ owner = (void *)(unsigned long)atomic_inc_return(&counter);
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
+ 64, 0, &job);
if (r)
goto err;
job->enforce_isolation = true;
+ /* always run the cleaner shader */
+ job->run_cleaner_shader = true;
ib = &job->ibs[0];
for (i = 0; i <= ring->funcs->align_mask; ++i)
@@ -1472,6 +1534,24 @@ static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
return 0;
}
+/**
+ * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * Provides the sysfs interface to manually run a cleaner shader, which is
+ * used to clear the GPU state between different tasks. Writing a value to the
+ * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution.
+ * The value written corresponds to the partition index on multi-partition
+ * devices. On single-partition devices, the value should be '0'.
+ *
+ * The cleaner shader clears the Local Data Store (LDS) and General Purpose
+ * Registers (GPRs) to ensure data isolation between GPU workloads.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
struct device_attribute *attr,
const char *buf,
@@ -1487,6 +1567,9 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
if (adev->in_suspend && !adev->in_runpm)
return -EPERM;
+ if (adev->gfx.disable_kq)
+ return -EPERM;
+
ret = kstrtol(buf, 0, &value);
if (ret)
@@ -1520,6 +1603,20 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
return count;
}
+/**
+ * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer to store the output data
+ *
+ * Provides the sysfs read interface to get the current settings of the 'enforce_isolation'
+ * feature for each GPU partition. Reading from the 'enforce_isolation'
+ * sysfs file returns the isolation settings for all partitions, where '0'
+ * indicates disabled, '1' indicates enabled, and '2' indicates enabled in legacy mode,
+ * and '3' indicates enabled without cleaner shader.
+ *
+ * Return: The number of bytes read from the sysfs file.
+ */
static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1543,6 +1640,23 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
return size;
}
+/**
+ * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation
+ * @dev: The device structure
+ * @attr: The device attribute structure
+ * @buf: The buffer containing the input data
+ * @count: The size of the input data
+ *
+ * This function allows control over the 'enforce_isolation' feature, which
+ * serializes access to the graphics engine. Writing '0' to disable, '1' to
+ * enable isolation with cleaner shader, '2' to enable legacy isolation without
+ * cleaner shader, or '3' to enable process isolation without submitting the
+ * cleaner shader to the 'enforce_isolation' sysfs file sets the isolation mode
+ * for each partition. The input should specify the setting for all
+ * partitions.
+ *
+ * Return: The number of bytes written to the sysfs file.
+ */
static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
@@ -1576,28 +1690,67 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
return -EINVAL;
for (i = 0; i < num_partitions; i++) {
- if (partition_values[i] != 0 && partition_values[i] != 1)
+ if (partition_values[i] != 0 &&
+ partition_values[i] != 1 &&
+ partition_values[i] != 2 &&
+ partition_values[i] != 3)
return -EINVAL;
}
mutex_lock(&adev->enforce_isolation_mutex);
-
for (i = 0; i < num_partitions; i++) {
- if (adev->enforce_isolation[i] && !partition_values[i]) {
- /* Going from enabled to disabled */
- amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
- } else if (!adev->enforce_isolation[i] && partition_values[i]) {
- /* Going from disabled to enabled */
- amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
+ switch (partition_values[i]) {
+ case 0:
+ default:
+ adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
+ break;
+ case 1:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE;
+ break;
+ case 2:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
+ break;
+ case 3:
+ adev->enforce_isolation[i] =
+ AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
+ break;
}
- adev->enforce_isolation[i] = partition_values[i];
}
-
mutex_unlock(&adev->enforce_isolation_mutex);
+ amdgpu_mes_update_enforce_isolation(adev);
+
return count;
}
+static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset);
+}
+
+static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset);
+}
+
static DEVICE_ATTR(run_cleaner_shader, 0200,
NULL, amdgpu_gfx_set_run_cleaner_shader);
@@ -1611,45 +1764,138 @@ static DEVICE_ATTR(current_compute_partition, 0644,
static DEVICE_ATTR(available_compute_partition, 0444,
amdgpu_gfx_get_available_compute_partition, NULL);
+static DEVICE_ATTR(gfx_reset_mask, 0444,
+ amdgpu_gfx_get_gfx_reset_mask, NULL);
-int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
+static DEVICE_ATTR(compute_reset_mask, 0444,
+ amdgpu_gfx_get_compute_reset_mask, NULL);
+
+static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev)
{
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ bool xcp_switch_supported;
int r;
+ if (!xcp_mgr)
+ return 0;
+
+ xcp_switch_supported =
+ (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
+
+ if (!xcp_switch_supported)
+ dev_attr_current_compute_partition.attr.mode &=
+ ~(S_IWUSR | S_IWGRP | S_IWOTH);
+
r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
if (r)
return r;
- r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
+ if (xcp_switch_supported)
+ r = device_create_file(adev->dev,
+ &dev_attr_available_compute_partition);
return r;
}
-void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
+static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev)
{
+ struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
+ bool xcp_switch_supported;
+
+ if (!xcp_mgr)
+ return;
+
+ xcp_switch_supported =
+ (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
device_remove_file(adev->dev, &dev_attr_current_compute_partition);
- device_remove_file(adev->dev, &dev_attr_available_compute_partition);
+
+ if (xcp_switch_supported)
+ device_remove_file(adev->dev,
+ &dev_attr_available_compute_partition);
}
-int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
+static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
{
int r;
r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
if (r)
return r;
+ if (adev->gfx.enable_cleaner_shader)
+ r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
- r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
- if (r)
+ return r;
+}
+
+static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
+{
+ device_remove_file(adev->dev, &dev_attr_enforce_isolation);
+ if (adev->gfx.enable_cleaner_shader)
+ device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
+}
+
+static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (!amdgpu_gpu_recovery)
return r;
- return 0;
+ if (adev->gfx.num_gfx_rings) {
+ r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask);
+ if (r)
+ return r;
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ r = device_create_file(adev->dev, &dev_attr_compute_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
}
-void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
+static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev)
{
- device_remove_file(adev->dev, &dev_attr_enforce_isolation);
- device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
+ if (!amdgpu_gpu_recovery)
+ return;
+
+ if (adev->gfx.num_gfx_rings)
+ device_remove_file(adev->dev, &dev_attr_gfx_reset_mask);
+
+ if (adev->gfx.num_compute_rings)
+ device_remove_file(adev->dev, &dev_attr_compute_reset_mask);
+}
+
+int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_gfx_sysfs_xcp_init(adev);
+ if (r) {
+ dev_err(adev->dev, "failed to create xcp sysfs files");
+ return r;
+ }
+
+ r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
+ if (r)
+ dev_err(adev->dev, "failed to create isolation sysfs files");
+
+ r = amdgpu_gfx_sysfs_reset_mask_init(adev);
+ if (r)
+ dev_err(adev->dev, "failed to create reset mask sysfs files");
+
+ return r;
+}
+
+void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ amdgpu_gfx_sysfs_xcp_fini(adev);
+ amdgpu_gfx_sysfs_isolation_shader_fini(adev);
+ amdgpu_gfx_sysfs_reset_mask_fini(adev);
+ }
}
int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
@@ -1720,39 +1966,41 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
bool enable)
{
- mutex_lock(&adev->gfx.kfd_sch_mutex);
+ mutex_lock(&adev->gfx.userq_sch_mutex);
if (enable) {
/* If the count is already 0, it means there's an imbalance bug somewhere.
* Note that the bug may be in a different caller than the one which triggers the
* WARN_ON_ONCE.
*/
- if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
+ if (WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx] == 0)) {
dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
goto unlock;
}
- adev->gfx.kfd_sch_req_count[idx]--;
+ adev->gfx.userq_sch_req_count[idx]--;
- if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
- adev->gfx.kfd_sch_inactive[idx]) {
+ if (adev->gfx.userq_sch_req_count[idx] == 0 &&
+ adev->gfx.userq_sch_inactive[idx]) {
schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
- GFX_SLICE_PERIOD);
+ msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
}
} else {
- if (adev->gfx.kfd_sch_req_count[idx] == 0) {
+ if (adev->gfx.userq_sch_req_count[idx] == 0) {
cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
- if (!adev->gfx.kfd_sch_inactive[idx]) {
- amdgpu_amdkfd_stop_sched(adev, idx);
- adev->gfx.kfd_sch_inactive[idx] = true;
+ if (!adev->gfx.userq_sch_inactive[idx]) {
+ amdgpu_userq_stop_sched_for_enforce_isolation(adev, idx);
+ if (adev->kfd.init_complete)
+ amdgpu_amdkfd_stop_sched(adev, idx);
+ adev->gfx.userq_sch_inactive[idx] = true;
}
}
- adev->gfx.kfd_sch_req_count[idx]++;
+ adev->gfx.userq_sch_req_count[idx]++;
}
unlock:
- mutex_unlock(&adev->gfx.kfd_sch_mutex);
+ mutex_unlock(&adev->gfx.userq_sch_mutex);
}
/**
@@ -1792,24 +2040,92 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
}
if (fences) {
+ /* we've already had our timeslice, so let's wrap this up */
schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
- GFX_SLICE_PERIOD);
+ msecs_to_jiffies(1));
} else {
/* Tell KFD to resume the runqueue */
- if (adev->kfd.init_complete) {
- WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
- WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
- amdgpu_amdkfd_start_sched(adev, idx);
- adev->gfx.kfd_sch_inactive[idx] = false;
+ WARN_ON_ONCE(!adev->gfx.userq_sch_inactive[idx]);
+ WARN_ON_ONCE(adev->gfx.userq_sch_req_count[idx]);
+
+ amdgpu_userq_start_sched_for_enforce_isolation(adev, idx);
+ if (adev->kfd.init_complete)
+ amdgpu_amdkfd_start_sched(adev, idx);
+ adev->gfx.userq_sch_inactive[idx] = false;
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+}
+
+/**
+ * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation
+ * @adev: amdgpu_device pointer
+ * @idx: Index of the GPU partition
+ *
+ * When kernel submissions come in, the jobs are given a time slice and once
+ * that time slice is up, if there are KFD user queues active, kernel
+ * submissions are blocked until KFD has had its time slice. Once the KFD time
+ * slice is up, KFD user queues are preempted and kernel submissions are
+ * unblocked and allowed to run again.
+ */
+static void
+amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
+ u32 idx)
+{
+ unsigned long cjiffies;
+ bool wait = false;
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
+ /* set the initial values if nothing is set */
+ if (!adev->gfx.enforce_isolation_jiffies[idx]) {
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+ }
+ /* Make sure KFD gets a chance to run */
+ if (amdgpu_amdkfd_compute_active(adev, idx)) {
+ cjiffies = jiffies;
+ if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) {
+ cjiffies -= adev->gfx.enforce_isolation_jiffies[idx];
+ if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) {
+ /* if our time is up, let KGD work drain before scheduling more */
+ wait = true;
+ /* reset the timer period */
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
+ } else {
+ /* set the timer period to what's left in our time slice */
+ adev->gfx.enforce_isolation_time[idx] =
+ GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies);
+ }
+ } else {
+ /* if jiffies wrap around we will just wait a little longer */
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ }
+ } else {
+ /* if there is no KFD work, then set the full slice period */
+ adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
+ adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
}
}
mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (wait)
+ msleep(GFX_SLICE_PERIOD_MS);
}
+/**
+ * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
+ *
+ * Ring begin_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
+ */
void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 idx;
+ bool sched_work = false;
if (!adev->gfx.enable_cleaner_shader)
return;
@@ -1822,18 +2138,34 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
if (idx >= MAX_XCP)
return;
+ /* Don't submit more work until KFD has had some time */
+ amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
+
mutex_lock(&adev->enforce_isolation_mutex);
- if (adev->enforce_isolation[idx]) {
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
if (adev->kfd.init_complete)
- amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
+ sched_work = true;
}
mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (sched_work)
+ amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
}
+/**
+ * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation
+ * @ring: Pointer to the amdgpu_ring structure
+ *
+ * Ring end_use helper implementation for gfx which serializes access to the
+ * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
+ * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
+ * each get a time slice when both are active.
+ */
void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 idx;
+ bool sched_work = false;
if (!adev->gfx.enable_cleaner_shader)
return;
@@ -1847,9 +2179,303 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
return;
mutex_lock(&adev->enforce_isolation_mutex);
- if (adev->enforce_isolation[idx]) {
+ if (adev->enforce_isolation[idx] == AMDGPU_ENFORCE_ISOLATION_ENABLE) {
if (adev->kfd.init_complete)
- amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
+ sched_work = true;
}
mutex_unlock(&adev->enforce_isolation_mutex);
+
+ if (sched_work)
+ amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
+}
+
+void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, gfx.idle_work.work);
+ enum PP_SMC_POWER_PROFILE profile;
+ u32 i, fences = 0;
+ int r;
+
+ if (adev->gfx.num_gfx_rings)
+ profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+ else
+ profile = PP_SMC_POWER_PROFILE_COMPUTE;
+
+ for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
+ fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
+ if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
+ mutex_lock(&adev->gfx.workload_profile_mutex);
+ if (adev->gfx.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, profile, false);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
+ profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
+ "fullscreen 3D" : "compute");
+ adev->gfx.workload_profile_active = false;
+ }
+ mutex_unlock(&adev->gfx.workload_profile_mutex);
+ } else {
+ schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
+ }
+}
+
+void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ enum PP_SMC_POWER_PROFILE profile;
+ int r;
+
+ if (amdgpu_dpm_is_overdrive_enabled(adev))
+ return;
+
+ if (adev->gfx.num_gfx_rings)
+ profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+ else
+ profile = PP_SMC_POWER_PROFILE_COMPUTE;
+
+ atomic_inc(&adev->gfx.total_submission_cnt);
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+ /* We can safely return early here because we've cancelled the
+ * the delayed work so there is no one else to set it to false
+ * and we don't care if someone else sets it to true.
+ */
+ if (adev->gfx.workload_profile_active)
+ return;
+
+ mutex_lock(&adev->gfx.workload_profile_mutex);
+ if (!adev->gfx.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, profile, true);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
+ profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
+ "fullscreen 3D" : "compute");
+ adev->gfx.workload_profile_active = true;
+ }
+ mutex_unlock(&adev->gfx.workload_profile_mutex);
+}
+
+void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (amdgpu_dpm_is_overdrive_enabled(adev))
+ return;
+
+ atomic_dec(&ring->adev->gfx.total_submission_cnt);
+
+ schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_gfx_csb_preamble_start - Set CSB preamble start
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble setup.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer)
+{
+ u32 count = 0;
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ buffer[count++] = cpu_to_le32(0x80000000);
+ buffer[count++] = cpu_to_le32(0x80000000);
+
+ return count;
+}
+
+/**
+ * amdgpu_gfx_csb_data_parser - Parser CS data
+ *
+ * @adev: amdgpu_device pointer used to get the CS data and other gfx info.
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ *
+ * Return:
+ * return the latest index.
+ */
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count)
+{
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ u32 i;
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
+
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ }
+ }
+ }
+
+ return count;
+}
+
+/**
+ * amdgpu_gfx_csb_preamble_end - Set CSB preamble end
+ *
+ * @buffer: This is an output variable that gets the PACKET3 preamble end.
+ * @count: Index to start set the preemble end.
+ */
+void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count)
+{
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+ buffer[count++] = cpu_to_le32(0);
+}
+
+/*
+ * debugfs for to enable/disable gfx job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->gfx.num_gfx_rings) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (val & (1 << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops,
+ amdgpu_debugfs_gfx_sched_mask_get,
+ amdgpu_debugfs_gfx_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->gfx.num_gfx_rings > 1))
+ return;
+ sprintf(name, "amdgpu_gfx_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_gfx_sched_mask_fops);
+#endif
+}
+
+/*
+ * debugfs for to enable/disable compute job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->gfx.num_compute_rings) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+ ring = &adev->gfx.compute_ring[i];
+ if (val & (1 << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops,
+ amdgpu_debugfs_compute_sched_mask_get,
+ amdgpu_debugfs_compute_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->gfx.num_compute_rings > 1))
+ return;
+ sprintf(name, "amdgpu_compute_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_compute_sched_mask_fops);
+#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 5644e10a86a9..08f268dab8f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -57,6 +57,9 @@ enum amdgpu_gfx_pipe_priority {
#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0
#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15
+/* 1 second timeout */
+#define GFX_PROFILE_IDLE_TIMEOUT msecs_to_jiffies(1000)
+
enum amdgpu_gfx_partition {
AMDGPU_SPX_PARTITION_MODE = 0,
AMDGPU_DPX_PARTITION_MODE = 1,
@@ -167,10 +170,46 @@ struct amdgpu_kiq {
#define AMDGPU_GFX_MAX_SE 4
#define AMDGPU_GFX_MAX_SH_PER_SE 2
+/**
+ * amdgpu_rb_config - Configure a single Render Backend (RB)
+ *
+ * Bad RBs are fused off and there is a harvest register the driver reads to
+ * determine which RB(s) are fused off so that the driver can configure the
+ * hardware state so that nothing gets sent to them. There are also user
+ * harvest registers that the driver can program to disable additional RBs,
+ * etc., for testing purposes.
+ */
struct amdgpu_rb_config {
+ /**
+ * @rb_backend_disable:
+ *
+ * The value captured from register RB_BACKEND_DISABLE indicates if the
+ * RB backend is disabled or not.
+ */
uint32_t rb_backend_disable;
+
+ /**
+ * @user_rb_backend_disable:
+ *
+ * The value captured from register USER_RB_BACKEND_DISABLE indicates
+ * if the User RB backend is disabled or not.
+ */
uint32_t user_rb_backend_disable;
+
+ /**
+ * @raster_config:
+ *
+ * To set up all of the states, it is necessary to have two registers
+ * to keep all of the states. This field holds the first register.
+ */
uint32_t raster_config;
+
+ /**
+ * @raster_config_1:
+ *
+ * To set up all of the states, it is necessary to have two registers
+ * to keep all of the states. This field holds the second register.
+ */
uint32_t raster_config_1;
};
@@ -218,6 +257,13 @@ struct amdgpu_gfx_config {
uint32_t macrotile_mode_array[16];
struct gb_addr_config gb_addr_config_fields;
+
+ /**
+ * @rb_config:
+ *
+ * Matrix that keeps all the Render Backend (color and depth buffer
+ * handling) configuration on the 3D engine.
+ */
struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
/* gfx configure feature */
@@ -302,7 +348,8 @@ struct amdgpu_gfx_funcs {
void (*init_spm_golden)(struct amdgpu_device *adev);
void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
- struct amdgpu_gfx_shadow_info *shadow_info);
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check);
enum amdgpu_gfx_partition
(*query_partition_mode)(struct amdgpu_device *adev);
int (*switch_partition_mode)(struct amdgpu_device *adev,
@@ -424,6 +471,8 @@ struct amdgpu_gfx {
/* reset mask */
uint32_t grbm_soft_reset;
uint32_t srbm_soft_reset;
+ uint32_t gfx_supported_reset;
+ uint32_t compute_supported_reset;
/* gfx off */
bool gfx_off_state; /* true: enabled, false: disabled */
@@ -469,9 +518,19 @@ struct amdgpu_gfx {
bool enable_cleaner_shader;
struct amdgpu_isolation_work enforce_isolation[MAX_XCP];
/* Mutex for synchronizing KFD scheduler operations */
- struct mutex kfd_sch_mutex;
- u64 kfd_sch_req_count[MAX_XCP];
- bool kfd_sch_inactive[MAX_XCP];
+ struct mutex userq_sch_mutex;
+ u64 userq_sch_req_count[MAX_XCP];
+ bool userq_sch_inactive[MAX_XCP];
+ unsigned long enforce_isolation_jiffies[MAX_XCP];
+ unsigned long enforce_isolation_time[MAX_XCP];
+
+ atomic_t total_submission_cnt;
+ struct delayed_work idle_work;
+ bool workload_profile_active;
+ struct mutex workload_profile_mutex;
+
+ bool disable_kq;
+ bool disable_uq;
};
struct amdgpu_gfx_ras_reg_entry {
@@ -491,7 +550,7 @@ struct amdgpu_gfx_ras_mem_id_entry {
#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid, xcc_id) ((adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid), (xcc_id)))
#define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
-#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si)))
+#define amdgpu_gfx_get_gfx_shadow_info(adev, si) ((adev)->gfx.funcs->get_gfx_shadow_info((adev), (si), false))
/**
* amdgpu_gfx_create_bitmask - create a bitmask
@@ -538,13 +597,10 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
-int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
- int pipe, int queue);
-void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
- int *me, int *pipe, int *queue);
bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
int pipe, int queue);
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
+void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable);
int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
@@ -579,12 +635,20 @@ void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev);
void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
unsigned int cleaner_shader_size,
const void *cleaner_shader_ptr);
-int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev);
-void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev);
void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work);
void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring);
+void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work);
+void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring);
+u32 amdgpu_gfx_csb_preamble_start(volatile u32 *buffer);
+u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, volatile u32 *buffer, u32 count);
+void amdgpu_gfx_csb_preamble_end(volatile u32 *buffer, u32 count);
+
+void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
+
static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
{
switch (mode) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 17a19d49d30a..6b0fbbb91e57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -269,7 +269,7 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc
* @mc: memory controller structure holding memory information
* @gart_placement: GART placement policy with respect to VRAM
*
- * Function will place try to place GART before or after VRAM.
+ * Function will try to place GART before or after VRAM.
* If GART size is bigger than space left then we ajust GART size.
* Thus function will never fails.
*/
@@ -573,6 +573,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
unsigned i;
unsigned vmhub, inv_eng;
+ struct amdgpu_ring *shared_ring;
/* init the vm inv eng for all vmhubs */
for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
@@ -591,7 +592,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
if (ring == &adev->mes.ring[0] ||
ring == &adev->mes.ring[1] ||
- ring == &adev->umsch_mm.ring)
+ ring == &adev->umsch_mm.ring ||
+ ring == &adev->cper.ring_buf)
+ continue;
+
+ /* Skip if the ring is a shared ring */
+ if (amdgpu_sdma_is_shared_inv_eng(adev, ring))
continue;
inv_eng = ffs(vm_inv_engs[vmhub]);
@@ -606,6 +612,21 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
ring->name, ring->vm_inv_eng, ring->vm_hub);
+ /* SDMA has a special packet which allows it to use the same
+ * invalidation engine for all the rings in one instance.
+ * Therefore, we do not allocate a separate VM invalidation engine
+ * for SDMA page rings. Instead, they share the VM invalidation
+ * engine with the SDMA gfx ring. This change ensures efficient
+ * resource management and avoids the issue of insufficient VM
+ * invalidation engines.
+ */
+ shared_ring = amdgpu_sdma_get_shared_ring(adev, ring);
+ if (shared_ring) {
+ shared_ring->vm_inv_eng = ring->vm_inv_eng;
+ dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n",
+ ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub);
+ continue;
+ }
}
return 0;
@@ -678,12 +699,10 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
uint32_t flush_type, bool all_hub,
uint32_t inst)
{
- u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT :
- adev->usec_timeout;
struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
unsigned int ndw;
- int r;
+ int r, cnt = 0;
uint32_t seq;
/*
@@ -740,10 +759,21 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq[inst].ring_lock);
- if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) {
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
+ !amdgpu_reset_pending(adev->reset_domain)) {
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY) {
dev_err(adev->dev, "timeout waiting for kiq fence\n");
r = -ETIME;
- }
+ } else
+ r = 0;
}
error_unlock_reset:
@@ -851,6 +881,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
/* Don't enable it by default yet.
*/
if (amdgpu_tmz < 1) {
@@ -888,6 +919,7 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
gc_ver == IP_VERSION(9, 4, 2) ||
gc_ver == IP_VERSION(9, 4, 3) ||
gc_ver == IP_VERSION(9, 4, 4) ||
+ gc_ver == IP_VERSION(9, 5, 0) ||
gc_ver >= IP_VERSION(10, 3, 0));
if (!amdgpu_sriov_xnack_support(adev))
@@ -1065,18 +1097,6 @@ uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo));
}
-/**
- * amdgpu_gmc_vram_cpu_pa - calculate vram buffer object's physical address
- * from CPU's view
- *
- * @adev: amdgpu_device pointer
- * @bo: amdgpu buffer object
- */
-uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
-{
- return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base;
-}
-
int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
{
struct amdgpu_bo *vram_bo = NULL;
@@ -1130,6 +1150,79 @@ release_buffer:
return ret;
}
+static const char *nps_desc[] = {
+ [AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
+ [AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
+ [AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
+ [AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
+ [AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
+ [AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
+};
+
+static ssize_t available_memory_partition_show(struct device *dev,
+ struct device_attribute *addr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int size = 0, mode;
+ char *sep = "";
+
+ for_each_inst(mode, adev->gmc.supported_nps_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
+ sep = ", ";
+ }
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static ssize_t current_memory_partition_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ enum amdgpu_memory_partition mode;
+ struct amdgpu_hive_info *hive;
+ int i;
+
+ mode = UNKNOWN_MEMORY_PARTITION_MODE;
+ for_each_inst(i, adev->gmc.supported_nps_modes) {
+ if (!strncasecmp(nps_desc[i], buf, strlen(nps_desc[i]))) {
+ mode = i;
+ break;
+ }
+ }
+
+ if (mode == UNKNOWN_MEMORY_PARTITION_MODE)
+ return -EINVAL;
+
+ if (mode == adev->gmc.gmc_funcs->query_mem_partition_mode(adev)) {
+ dev_info(
+ adev->dev,
+ "requested NPS mode is same as current NPS mode, skipping\n");
+ return count;
+ }
+
+ /* If device is part of hive, all devices in the hive should request the
+ * same mode. Hence store the requested mode in hive.
+ */
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ atomic_set(&hive->requested_nps_mode, mode);
+ amdgpu_put_xgmi_hive(hive);
+ } else {
+ adev->gmc.requested_nps_mode = mode;
+ }
+
+ dev_info(
+ adev->dev,
+ "NPS mode change requested, please remove and reload the driver\n");
+
+ return count;
+}
+
static ssize_t current_memory_partition_show(
struct device *dev, struct device_attribute *addr, char *buf)
{
@@ -1137,54 +1230,70 @@ static ssize_t current_memory_partition_show(
struct amdgpu_device *adev = drm_to_adev(ddev);
enum amdgpu_memory_partition mode;
+ /* Only minimal precaution taken to reject requests while in reset */
+ if (amdgpu_in_reset(adev))
+ return -EPERM;
+
mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
- switch (mode) {
- case AMDGPU_NPS1_PARTITION_MODE:
- return sysfs_emit(buf, "NPS1\n");
- case AMDGPU_NPS2_PARTITION_MODE:
- return sysfs_emit(buf, "NPS2\n");
- case AMDGPU_NPS3_PARTITION_MODE:
- return sysfs_emit(buf, "NPS3\n");
- case AMDGPU_NPS4_PARTITION_MODE:
- return sysfs_emit(buf, "NPS4\n");
- case AMDGPU_NPS6_PARTITION_MODE:
- return sysfs_emit(buf, "NPS6\n");
- case AMDGPU_NPS8_PARTITION_MODE:
- return sysfs_emit(buf, "NPS8\n");
- default:
+ if ((mode >= ARRAY_SIZE(nps_desc)) ||
+ (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode))
return sysfs_emit(buf, "UNKNOWN\n");
- }
+
+ return sysfs_emit(buf, "%s\n", nps_desc[mode]);
}
-static DEVICE_ATTR_RO(current_memory_partition);
+static DEVICE_ATTR_RW(current_memory_partition);
+static DEVICE_ATTR_RO(available_memory_partition);
int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev)
{
+ bool nps_switch_support;
+ int r = 0;
+
if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
return 0;
+ nps_switch_support = (hweight32(adev->gmc.supported_nps_modes &
+ AMDGPU_ALL_NPS_MASK) > 1);
+ if (!nps_switch_support)
+ dev_attr_current_memory_partition.attr.mode &=
+ ~(S_IWUSR | S_IWGRP | S_IWOTH);
+ else
+ r = device_create_file(adev->dev,
+ &dev_attr_available_memory_partition);
+
+ if (r)
+ return r;
+
return device_create_file(adev->dev,
&dev_attr_current_memory_partition);
}
void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
{
+ if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
+ return;
+
device_remove_file(adev->dev, &dev_attr_current_memory_partition);
+ device_remove_file(adev->dev, &dev_attr_available_memory_partition);
}
int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
struct amdgpu_mem_partition_info *mem_ranges,
- int exp_ranges)
+ uint8_t *exp_ranges)
{
struct amdgpu_gmc_memrange *ranges;
int range_cnt, ret, i, j;
uint32_t nps_type;
+ bool refresh;
- if (!mem_ranges)
+ if (!mem_ranges || !exp_ranges)
return -EINVAL;
+ refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
+ (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS);
ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges,
- &range_cnt);
+ &range_cnt, refresh);
if (ret)
return ret;
@@ -1192,16 +1301,16 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
/* TODO: For now, expect ranges and partition count to be the same.
* Adjust if there are holes expected in any NPS domain.
*/
- if (range_cnt != exp_ranges) {
+ if (*exp_ranges && (range_cnt != *exp_ranges)) {
dev_warn(
adev->dev,
"NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d",
- exp_ranges, nps_type, range_cnt);
+ *exp_ranges, nps_type, range_cnt);
ret = -EINVAL;
goto err;
}
- for (i = 0; i < exp_ranges; ++i) {
+ for (i = 0; i < range_cnt; ++i) {
if (ranges[i].base_address >= ranges[i].limit_address) {
dev_warn(
adev->dev,
@@ -1242,8 +1351,81 @@ int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
ranges[i].limit_address - ranges[i].base_address + 1;
}
+ if (!*exp_ranges)
+ *exp_ranges = range_cnt;
err:
kfree(ranges);
return ret;
}
+
+int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
+ int nps_mode)
+{
+ /* Not supported on VF devices and APUs */
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return -EOPNOTSUPP;
+
+ if (!adev->psp.funcs) {
+ dev_err(adev->dev,
+ "PSP interface not available for nps mode change request");
+ return -EINVAL;
+ }
+
+ return psp_memory_partition(&adev->psp, nps_mode);
+}
+
+static inline bool amdgpu_gmc_need_nps_switch_req(struct amdgpu_device *adev,
+ int req_nps_mode,
+ int cur_nps_mode)
+{
+ return (((BIT(req_nps_mode) & adev->gmc.supported_nps_modes) ==
+ BIT(req_nps_mode)) &&
+ req_nps_mode != cur_nps_mode);
+}
+
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev)
+{
+ int req_nps_mode, cur_nps_mode, r;
+ struct amdgpu_hive_info *hive;
+
+ if (amdgpu_sriov_vf(adev) || !adev->gmc.supported_nps_modes ||
+ !adev->gmc.gmc_funcs->request_mem_partition_mode)
+ return;
+
+ cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (hive) {
+ req_nps_mode = atomic_read(&hive->requested_nps_mode);
+ if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode,
+ cur_nps_mode)) {
+ amdgpu_put_xgmi_hive(hive);
+ return;
+ }
+ r = amdgpu_xgmi_request_nps_change(adev, hive, req_nps_mode);
+ amdgpu_put_xgmi_hive(hive);
+ goto out;
+ }
+
+ req_nps_mode = adev->gmc.requested_nps_mode;
+ if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, cur_nps_mode))
+ return;
+
+ /* even if this fails, we should let driver unload w/o blocking */
+ r = adev->gmc.gmc_funcs->request_mem_partition_mode(adev, req_nps_mode);
+out:
+ if (r)
+ dev_err(adev->dev, "NPS mode change request failed\n");
+ else
+ dev_info(
+ adev->dev,
+ "NPS mode change request done, reload driver to complete the change\n");
+}
+
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev)
+{
+ if (adev->gmc.gmc_funcs->need_reset_on_init)
+ return adev->gmc.gmc_funcs->need_reset_on_init(adev);
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 4d951a1baefa..80fa29c26e9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -29,6 +29,7 @@
#include <linux/types.h>
#include "amdgpu_irq.h"
+#include "amdgpu_xgmi.h"
#include "amdgpu_ras.h"
/* VA hole for 48bit addresses on Vega10 */
@@ -61,6 +62,9 @@
*/
#define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL
+/* XNACK flags */
+#define AMDGPU_GMC_XNACK_FLAG_CHAIN BIT(0)
+
struct firmware;
enum amdgpu_memory_partition {
@@ -73,6 +77,13 @@ enum amdgpu_memory_partition {
AMDGPU_NPS8_PARTITION_MODE = 8,
};
+#define AMDGPU_ALL_NPS_MASK \
+ (BIT(AMDGPU_NPS1_PARTITION_MODE) | BIT(AMDGPU_NPS2_PARTITION_MODE) | \
+ BIT(AMDGPU_NPS3_PARTITION_MODE) | BIT(AMDGPU_NPS4_PARTITION_MODE) | \
+ BIT(AMDGPU_NPS6_PARTITION_MODE) | BIT(AMDGPU_NPS8_PARTITION_MODE))
+
+#define AMDGPU_GMC_INIT_RESET_NPS BIT(0)
+
/*
* GMC page fault information
*/
@@ -161,29 +172,10 @@ struct amdgpu_gmc_funcs {
enum amdgpu_memory_partition (*query_mem_partition_mode)(
struct amdgpu_device *adev);
-};
-
-struct amdgpu_xgmi_ras {
- struct amdgpu_ras_block_object ras_block;
-};
-
-struct amdgpu_xgmi {
- /* from psp */
- u64 node_id;
- u64 hive_id;
- /* fixed per family */
- u64 node_segment_size;
- /* physical node (0-3) */
- unsigned physical_node_id;
- /* number of nodes (0-4) */
- unsigned num_physical_nodes;
- /* gpu list in the same hive */
- struct list_head head;
- bool supported;
- struct ras_common_if *ras_if;
- bool connected_to_cpu;
- bool pending_reset;
- struct amdgpu_xgmi_ras *ras;
+ /* Request NPS mode */
+ int (*request_mem_partition_mode)(struct amdgpu_device *adev,
+ int nps_mode);
+ bool (*need_reset_on_init)(struct amdgpu_device *adev);
};
struct amdgpu_mem_partition_info {
@@ -305,10 +297,14 @@ struct amdgpu_gmc {
struct amdgpu_mem_partition_info *mem_partitions;
uint8_t num_mem_partitions;
const struct amdgpu_gmc_funcs *gmc_funcs;
+ enum amdgpu_memory_partition requested_nps_mode;
+ uint32_t supported_nps_modes;
+ uint32_t reset_flags;
struct amdgpu_xgmi xgmi;
struct amdgpu_irq_src ecc_irq;
int noretry;
+ uint32_t xnack_flags;
uint32_t vmid0_page_table_block_size;
uint32_t vmid0_page_table_depth;
@@ -447,13 +443,17 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev);
void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev);
uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr);
uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
-uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo);
int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev);
void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev);
int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
struct amdgpu_mem_partition_info *mem_ranges,
- int exp_ranges);
+ uint8_t *exp_ranges);
+
+int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
+ int nps_mode);
+void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev);
+bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
index b6cf801939aa..6e02fb9ac2f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -22,6 +22,7 @@
*/
#include "amdgpu.h"
#include "amdgpu_ras.h"
+#include <uapi/linux/kfd_ioctl.h>
int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
{
@@ -46,3 +47,22 @@ int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev)
/* hdp ras follows amdgpu_ras_block_late_init_default for late init */
return 0;
}
+
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32((adev->rmmio_remap.reg_offset +
+ KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+ 2,
+ 0);
+ if (adev->nbio.funcs->get_memsize)
+ adev->nbio.funcs->get_memsize(adev);
+ } else {
+ amdgpu_ring_emit_wreg(ring,
+ (adev->rmmio_remap.reg_offset +
+ KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >>
+ 2,
+ 0);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 7b8a6152dc8d..4cfd932b7e91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -44,4 +44,6 @@ struct amdgpu_hdp {
};
int amdgpu_hdp_ras_sw_init(struct amdgpu_device *adev);
+void amdgpu_hdp_generic_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
#endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index 00d6211e0fbf..8179d0814db9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -225,13 +225,23 @@ void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c)
kfree(i2c);
}
-/* Add the default buses */
void amdgpu_i2c_init(struct amdgpu_device *adev)
{
- if (amdgpu_hw_i2c)
- DRM_INFO("hw_i2c forced on, you may experience display detection problems!\n");
-
- amdgpu_atombios_i2c_init(adev);
+ if (!adev->is_atom_fw) {
+ if (!amdgpu_device_has_dc_support(adev)) {
+ amdgpu_atombios_i2c_init(adev);
+ } else {
+ switch (adev->asic_type) {
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ amdgpu_atombios_oem_i2c_init(adev, 0x97);
+ break;
+ default:
+ break;
+ }
+ }
+ }
}
/* remove all the buses */
@@ -247,22 +257,6 @@ void amdgpu_i2c_fini(struct amdgpu_device *adev)
}
}
-/* Add additional buses */
-void amdgpu_i2c_add(struct amdgpu_device *adev,
- const struct amdgpu_i2c_bus_rec *rec,
- const char *name)
-{
- struct drm_device *dev = adev_to_drm(adev);
- int i;
-
- for (i = 0; i < AMDGPU_MAX_I2C_BUS; i++) {
- if (!adev->i2c_bus[i]) {
- adev->i2c_bus[i] = amdgpu_i2c_create(dev, rec, name);
- return;
- }
- }
-}
-
/* looks up bus based on id */
struct amdgpu_i2c_chan *
amdgpu_i2c_lookup(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
index 63c2ff7499e1..1d3d3806e0dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h
@@ -30,9 +30,6 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev,
void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c);
void amdgpu_i2c_init(struct amdgpu_device *adev);
void amdgpu_i2c_fini(struct amdgpu_device *adev);
-void amdgpu_i2c_add(struct amdgpu_device *adev,
- const struct amdgpu_i2c_bus_rec *rec,
- const char *name);
struct amdgpu_i2c_chan *
amdgpu_i2c_lookup(struct amdgpu_device *adev,
const struct amdgpu_i2c_bus_rec *i2c_bus);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 8b512dc28df8..802743efa3b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -89,16 +89,14 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
/**
* amdgpu_ib_free - free an IB (Indirect Buffer)
*
- * @adev: amdgpu_device pointer
* @ib: IB object to free
* @f: the fence SA bo need wait on for the ib alloation
*
* Free an IB (all asics).
*/
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
- struct dma_fence *f)
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f)
{
- amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
+ amdgpu_sa_bo_free(&ib->sa_bo, f);
}
/**
@@ -165,12 +163,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
init_shadow = false;
}
- if (!ring->sched.ready && !ring->is_mes_queue) {
+ if (!ring->sched.ready) {
dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
return -EINVAL;
}
- if (vm && !job->vmid && !ring->is_mes_queue) {
+ if (vm && !job->vmid) {
dev_err(adev->dev, "VM IB without ID\n");
return -EINVAL;
}
@@ -193,8 +191,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
need_ctx_switch = ring->current_ctx != fence_ctx;
if (ring->funcs->emit_pipeline_sync && job &&
((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
- (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
- amdgpu_vm_need_pipeline_sync(ring, job))) {
+ need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) {
+
need_pipe_sync = true;
if (tmp)
@@ -299,7 +297,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
amdgpu_ring_patch_cond_exec(ring, cond_exec);
ring->current_ctx = fence_ctx;
- if (vm && ring->funcs->emit_switch_buffer)
+ if (job && ring->funcs->emit_switch_buffer)
amdgpu_ring_emit_switch_buffer(ring);
if (ring->funcs->emit_wave_limit &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 92d27d32de41..5dd78a9cb12d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -209,7 +209,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring,
return 0;
}
- fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);
+ fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_NOWAIT);
if (!fences)
return -ENOMEM;
@@ -287,46 +287,34 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
(*id)->flushed_updates < updates ||
!(*id)->last_flush ||
((*id)->last_flush->context != fence_context &&
- !dma_fence_is_signaled((*id)->last_flush))) {
+ !dma_fence_is_signaled((*id)->last_flush)))
+ needs_flush = true;
+
+ if ((*id)->owner != vm->immediate.fence_context ||
+ (!adev->vm_manager.concurrent_flush && needs_flush)) {
struct dma_fence *tmp;
- /* Wait for the gang to be assembled before using a
- * reserved VMID or otherwise the gang could deadlock.
+ /* Don't use per engine and per process VMID at the
+ * same time
*/
- tmp = amdgpu_device_get_gang(adev);
- if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) {
+ if (adev->vm_manager.concurrent_flush)
+ ring = NULL;
+
+ /* to prevent one context starved by another context */
+ (*id)->pd_gpu_addr = 0;
+ tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
+ if (tmp) {
*id = NULL;
- *fence = tmp;
+ *fence = dma_fence_get(tmp);
return 0;
}
- dma_fence_put(tmp);
-
- /* Make sure the id is owned by the gang before proceeding */
- if (!job->gang_submit ||
- (*id)->owner != vm->immediate.fence_context) {
-
- /* Don't use per engine and per process VMID at the
- * same time
- */
- if (adev->vm_manager.concurrent_flush)
- ring = NULL;
-
- /* to prevent one context starved by another context */
- (*id)->pd_gpu_addr = 0;
- tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
- if (tmp) {
- *id = NULL;
- *fence = dma_fence_get(tmp);
- return 0;
- }
- }
- needs_flush = true;
}
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished);
+ r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished,
+ GFP_NOWAIT);
if (r)
return r;
@@ -342,15 +330,13 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
* @ring: ring we want to submit job to
* @job: job who wants to use the VMID
* @id: resulting VMID
- * @fence: fence to wait for if no id could be grabbed
*
* Try to reuse a VMID for this submission.
*/
static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
struct amdgpu_ring *ring,
struct amdgpu_job *job,
- struct amdgpu_vmid **id,
- struct dma_fence **fence)
+ struct amdgpu_vmid **id)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->vm_hub;
@@ -387,7 +373,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
* user of the VMID.
*/
r = amdgpu_sync_fence(&(*id)->active,
- &job->base.s_fence->finished);
+ &job->base.s_fence->finished,
+ GFP_NOWAIT);
if (r)
return r;
@@ -424,12 +411,12 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
if (r || !idle)
goto error;
- if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) {
+ if (amdgpu_vmid_uses_reserved(vm, vmhub)) {
r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
if (r || !id)
goto error;
} else {
- r = amdgpu_vmid_grab_used(vm, ring, job, &id, fence);
+ r = amdgpu_vmid_grab_used(vm, ring, job, &id);
if (r)
goto error;
@@ -439,7 +426,8 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
/* Remember this submission as user of the VMID */
r = amdgpu_sync_fence(&id->active,
- &job->base.s_fence->finished);
+ &job->base.s_fence->finished,
+ GFP_NOWAIT);
if (r)
goto error;
@@ -476,19 +464,14 @@ error:
/*
* amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID
- * @adev: amdgpu_device pointer
* @vm: the VM to check
* @vmhub: the VMHUB which will be used
*
* Returns: True if the VM will use a reserved VMID.
*/
-bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm, unsigned int vmhub)
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub)
{
- return vm->reserved_vmid[vmhub] ||
- (adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ?
- vm->root.bo->xcp_id : 0] &&
- AMDGPU_IS_GFXHUB(vmhub));
+ return vm->reserved_vmid[vmhub];
}
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
@@ -593,8 +576,16 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
INIT_LIST_HEAD(&id_mgr->ids_lru);
id_mgr->reserved_use_count = 0;
- /* manage only VMIDs not used by KFD */
- id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+ /* for GC <10, SDMA uses MMHUB so use first_kfd_vmid for both GC and MM */
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0))
+ /* manage only VMIDs not used by KFD */
+ id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
+ else if (AMDGPU_IS_MMHUB0(i) ||
+ AMDGPU_IS_MMHUB1(i))
+ id_mgr->num_ids = 16;
+ else
+ /* manage only VMIDs not used by KFD */
+ id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
/* skip over VMID 0, since it is the system VM */
for (j = 1; j < id_mgr->num_ids; ++j) {
@@ -605,7 +596,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
}
/* alloc a default reserved vmid to enforce isolation */
for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
- if (adev->enforce_isolation[i])
+ if (adev->enforce_isolation[i] != AMDGPU_ENFORCE_ISOLATION_DISABLE)
amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 4012fb2dd08a..240fa6751260 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -78,8 +78,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
-bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm, unsigned int vmhub);
+bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
unsigned vmhub);
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index f3b0aaf3ebc6..30f16968b578 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_ih.h"
+#include "amdgpu_reset.h"
/**
* amdgpu_ih_ring_init - initialize the IH state
@@ -227,13 +228,23 @@ restart_ih:
ih->rptr &= ih->ptr_mask;
}
- amdgpu_ih_set_rptr(adev, ih);
+ if (!ih->overflow)
+ amdgpu_ih_set_rptr(adev, ih);
+
wake_up_all(&ih->wait_process);
/* make sure wptr hasn't changed while processing */
wptr = amdgpu_ih_get_wptr(adev, ih);
if (wptr != ih->rptr)
- goto restart_ih;
+ if (!ih->overflow)
+ goto restart_ih;
+
+ if (ih->overflow)
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
return IRQ_HANDLED;
}
@@ -298,3 +309,9 @@ uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
return dw1 | ((u64)(dw2 & 0xffff) << 32);
}
+
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
+{
+ return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" :
+ ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown";
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 508f02eb0cf8..7f7ea046e209 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -72,12 +72,16 @@ struct amdgpu_ih_ring {
/* For waiting on IH processing at checkpoint. */
wait_queue_head_t wait_process;
uint64_t processed_timestamp;
+ bool overflow;
};
/* return true if time stamp t2 is after t1 with 48bit wrap around */
#define amdgpu_ih_ts_after(t1, t2) \
(((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL)
+#define amdgpu_ih_ts_after_or_equal(t1, t2) \
+ (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) >= 0LL)
+
/* provided by the ih block */
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
@@ -110,4 +114,5 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
signed int offset);
+const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 19ce4da285e8..13c60cac4261 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -619,6 +619,10 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned int type)
{
+ /* When the threshold is reached,the interrupt source may not be enabled.return -EINVAL */
+ if (amdgpu_ras_is_rma(adev))
+ return -EINVAL;
+
if (!adev->irq.installed)
return -ENOENT;
@@ -725,8 +729,8 @@ static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
*/
int amdgpu_irq_add_domain(struct amdgpu_device *adev)
{
- adev->irq.domain = irq_domain_add_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
- &amdgpu_hw_irqdomain_ops, adev);
+ adev->irq.domain = irq_domain_create_linear(NULL, AMDGPU_MAX_IRQ_SRC_ID,
+ &amdgpu_hw_irqdomain_ops, adev);
if (!adev->irq.domain) {
DRM_ERROR("GPU irq add domain failed\n");
return -ENODEV;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
index 4766e99dd98f..43fc941dfa57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c
@@ -33,33 +33,17 @@
#include "isp_v4_1_0.h"
#include "isp_v4_1_1.h"
-static int isp_sw_init(void *handle)
-{
- return 0;
-}
-
-static int isp_sw_fini(void *handle)
-{
- return 0;
-}
-
/**
* isp_hw_init - start and test isp block
*
- * @handle: handle for amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int isp_hw_init(void *handle)
+static int isp_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_isp *isp = &adev->isp;
- const struct amdgpu_ip_block *ip_block =
- amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ISP);
-
- if (!ip_block)
- return -EINVAL;
-
if (isp->funcs->hw_init != NULL)
return isp->funcs->hw_init(isp);
@@ -69,13 +53,12 @@ static int isp_hw_init(void *handle)
/**
* isp_hw_fini - stop the hardware block
*
- * @handle: handle for amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int isp_hw_fini(void *handle)
+static int isp_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amdgpu_isp *isp = &adev->isp;
+ struct amdgpu_isp *isp = &ip_block->adev->isp;
if (isp->funcs->hw_fini != NULL)
return isp->funcs->hw_fini(isp);
@@ -83,16 +66,6 @@ static int isp_hw_fini(void *handle)
return -ENODEV;
}
-static int isp_suspend(void *handle)
-{
- return 0;
-}
-
-static int isp_resume(void *handle)
-{
- return 0;
-}
-
static int isp_load_fw_by_psp(struct amdgpu_device *adev)
{
const struct common_firmware_header *hdr;
@@ -104,7 +77,8 @@ static int isp_load_fw_by_psp(struct amdgpu_device *adev)
sizeof(ucode_prefix));
/* read isp fw */
- r = amdgpu_ucode_request(adev, &adev->isp.fw, "amdgpu/%s.bin", ucode_prefix);
+ r = amdgpu_ucode_request(adev, &adev->isp.fw, AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s.bin", ucode_prefix);
if (r) {
amdgpu_ucode_release(&adev->isp.fw);
return r;
@@ -122,9 +96,10 @@ static int isp_load_fw_by_psp(struct amdgpu_device *adev)
return r;
}
-static int isp_early_init(void *handle)
+static int isp_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_isp *isp = &adev->isp;
switch (amdgpu_ip_version(adev, ISP_HWIP, 0)) {
@@ -149,28 +124,18 @@ static int isp_early_init(void *handle)
return 0;
}
-static bool isp_is_idle(void *handle)
+static bool isp_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int isp_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int isp_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int isp_set_clockgating_state(void *handle,
+static int isp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int isp_set_powergating_state(void *handle,
+static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -179,16 +144,9 @@ static int isp_set_powergating_state(void *handle,
static const struct amd_ip_funcs isp_ip_funcs = {
.name = "isp_ip",
.early_init = isp_early_init,
- .late_init = NULL,
- .sw_init = isp_sw_init,
- .sw_fini = isp_sw_fini,
.hw_init = isp_hw_init,
.hw_fini = isp_hw_fini,
- .suspend = isp_suspend,
- .resume = isp_resume,
.is_idle = isp_is_idle,
- .wait_for_idle = isp_wait_for_idle,
- .soft_reset = isp_soft_reset,
.set_clockgating_state = isp_set_clockgating_state,
.set_powergating_state = isp_set_powergating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
index b03664c66dd6..4f3b7b5d9c1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h
@@ -50,6 +50,7 @@ struct amdgpu_isp {
struct mfd_cell *isp_cell;
struct resource *isp_res;
struct resource *isp_i2c_res;
+ struct resource *isp_gpio_res;
struct isp_platform_data *isp_pdata;
unsigned int harvest_config;
const struct firmware *fw;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 16f2605ac50b..ddb9d3269357 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -42,7 +42,7 @@ static void amdgpu_job_do_core_dump(struct amdgpu_device *adev,
for (i = 0; i < adev->num_ip_blocks; i++)
if (adev->ip_blocks[i].version->funcs->dump_ip_state)
adev->ip_blocks[i].version->funcs
- ->dump_ip_state((void *)adev);
+ ->dump_ip_state((void *)&adev->ip_blocks[i]);
dev_info(adev->dev, "Dumping IP State Completed\n");
amdgpu_coredump(adev, true, false, job);
@@ -102,8 +102,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
return DRM_GPU_SCHED_STAT_ENODEV;
}
- adev->job_hang = true;
-
/*
* Do the coredump immediately after a job timeout to get a very
* close dump/snapshot/representation of GPU's current error status
@@ -132,27 +130,48 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
amdgpu_vm_put_task_info(ti);
}
- dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
-
/* attempt a per ring reset */
- if (amdgpu_gpu_recovery &&
- ring->funcs->reset) {
+ if (unlikely(adev->debug_disable_gpu_ring_reset)) {
+ dev_err(adev->dev, "Ring reset disabled by debug mask\n");
+ } else if (amdgpu_gpu_recovery && ring->funcs->reset) {
+ bool is_guilty;
+
+ dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name);
/* stop the scheduler, but don't mess with the
* bad job yet because if ring reset fails
* we'll fall back to full GPU reset.
*/
drm_sched_wqueue_stop(&ring->sched);
+
+ /* for engine resets, we need to reset the engine,
+ * but individual queues may be unaffected.
+ * check here to make sure the accounting is correct.
+ */
+ if (ring->funcs->is_guilty)
+ is_guilty = ring->funcs->is_guilty(ring);
+ else
+ is_guilty = true;
+
+ if (is_guilty)
+ dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
+
r = amdgpu_ring_reset(ring, job->vmid);
if (!r) {
if (amdgpu_ring_sched_ready(ring))
drm_sched_stop(&ring->sched, s_job);
- atomic_inc(&ring->adev->gpu_reset_counter);
- amdgpu_fence_driver_force_completion(ring);
+ if (is_guilty) {
+ atomic_inc(&ring->adev->gpu_reset_counter);
+ amdgpu_fence_driver_force_completion(ring);
+ }
if (amdgpu_ring_sched_ready(ring))
- drm_sched_start(&ring->sched);
+ drm_sched_start(&ring->sched, 0);
+ dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name);
+ drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE);
goto exit;
}
+ dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name);
}
+ dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
if (amdgpu_device_should_recover_gpu(ring->adev)) {
struct amdgpu_reset_context reset_context;
@@ -179,7 +198,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
}
exit:
- adev->job_hang = false;
drm_dev_exit(idx);
return DRM_GPU_SCHED_STAT_NOMINAL;
}
@@ -195,11 +213,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (!*job)
return -ENOMEM;
- /*
- * Initialize the scheduler to at least some ring so that we always
- * have a pointer to adev.
- */
- (*job)->base.sched = &adev->rings[0]->sched;
(*job)->vm = vm;
amdgpu_sync_create(&(*job)->explicit_sync);
@@ -253,20 +266,19 @@ void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
void amdgpu_job_free_resources(struct amdgpu_job *job)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct dma_fence *f;
unsigned i;
/* Check if any fences where initialized */
if (job->base.s_fence && job->base.s_fence->finished.ops)
f = &job->base.s_fence->finished;
- else if (job->hw_fence.ops)
- f = &job->hw_fence;
+ else if (job->hw_fence.base.ops)
+ f = &job->hw_fence.base;
else
f = NULL;
for (i = 0; i < job->num_ibs; ++i)
- amdgpu_ib_free(ring->adev, &job->ibs[i], f);
+ amdgpu_ib_free(&job->ibs[i], f);
}
static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
@@ -278,10 +290,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
amdgpu_sync_free(&job->explicit_sync);
/* only put the hw fence if has embedded fence */
- if (!job->hw_fence.ops)
+ if (!job->hw_fence.base.ops)
kfree(job);
else
- dma_fence_put(&job->hw_fence);
+ dma_fence_put(&job->hw_fence.base);
}
void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
@@ -310,10 +322,10 @@ void amdgpu_job_free(struct amdgpu_job *job)
if (job->gang_submit != &job->base.s_fence->scheduled)
dma_fence_put(job->gang_submit);
- if (!job->hw_fence.ops)
+ if (!job->hw_fence.base.ops)
kfree(job);
else
- dma_fence_put(&job->hw_fence);
+ dma_fence_put(&job->hw_fence.base);
}
struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
@@ -349,25 +361,40 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
{
struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
struct amdgpu_job *job = to_amdgpu_job(sched_job);
- struct dma_fence *fence = NULL;
+ struct dma_fence *fence;
int r;
r = drm_sched_entity_error(s_entity);
if (r)
goto error;
- if (!fence && job->gang_submit)
+ if (job->gang_submit) {
fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
+ if (fence)
+ return fence;
+ }
+
+ fence = amdgpu_device_enforce_isolation(ring->adev, ring, job);
+ if (fence)
+ return fence;
- while (!fence && job->vm && !job->vmid) {
+ if (job->vm && !job->vmid) {
r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
if (r) {
dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
goto error;
}
+ /*
+ * The VM structure might be released after the VMID is
+ * assigned, we had multiple problems with people trying to use
+ * the VM pointer so better set it to NULL.
+ */
+ if (!fence)
+ job->vm = NULL;
+ return fence;
}
- return fence;
+ return NULL;
error:
dma_fence_set_error(&job->base.s_fence->finished, r);
@@ -411,8 +438,24 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
return fence;
}
-#define to_drm_sched_job(sched_job) \
- container_of((sched_job), struct drm_sched_job, queue_node)
+/*
+ * This is a duplicate function from DRM scheduler sched_internal.h.
+ * Plan is to remove it when amdgpu_job_stop_all_jobs_on_sched is removed, due
+ * latter being incorrect and racy.
+ *
+ * See https://lore.kernel.org/amd-gfx/44edde63-7181-44fb-a4f7-94e50514f539@amd.com/
+ */
+static struct drm_sched_job *
+drm_sched_entity_queue_pop(struct drm_sched_entity *entity)
+{
+ struct spsc_node *node;
+
+ node = spsc_queue_pop(&entity->job_queue);
+ if (!node)
+ return NULL;
+
+ return container_of(node, struct drm_sched_job, queue_node);
+}
void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
{
@@ -425,7 +468,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
struct drm_sched_rq *rq = sched->sched_rq[i];
spin_lock(&rq->lock);
list_for_each_entry(s_entity, &rq->entities, list) {
- while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
+ while ((s_job = drm_sched_entity_queue_pop(s_entity))) {
struct drm_sched_fence *s_fence = s_job->s_fence;
dma_fence_signal(&s_fence->scheduled);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index ce6b9ba967ff..931fed8892cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -48,7 +48,7 @@ struct amdgpu_job {
struct drm_sched_job base;
struct amdgpu_vm *vm;
struct amdgpu_sync explicit_sync;
- struct dma_fence hw_fence;
+ struct amdgpu_fence hw_fence;
struct dma_fence *gang_submit;
uint32_t preamble_status;
uint32_t preemption_status;
@@ -78,6 +78,7 @@ struct amdgpu_job {
/* enforce isolation */
bool enforce_isolation;
+ bool run_cleaner_shader;
uint32_t num_ibs;
struct amdgpu_ib ibs[];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 6df99cb00d9a..dda29132dfb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -33,6 +33,7 @@
#define JPEG_IDLE_TIMEOUT msecs_to_jiffies(1000)
static void amdgpu_jpeg_idle_work_handler(struct work_struct *work);
+static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev);
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
{
@@ -47,7 +48,7 @@ int amdgpu_jpeg_sw_init(struct amdgpu_device *adev)
adev->jpeg.indirect_sram = true;
for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
- if (adev->jpeg.harvest_config & (1 << i))
+ if (adev->jpeg.harvest_config & (1U << i))
continue;
if (adev->jpeg.indirect_sram) {
@@ -73,7 +74,7 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- if (adev->jpeg.harvest_config & (1 << i))
+ if (adev->jpeg.harvest_config & (1U << i))
continue;
amdgpu_bo_free_kernel(
@@ -85,6 +86,9 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev)
amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]);
}
+ if (adev->jpeg.reg_list)
+ amdgpu_jpeg_reg_dump_fini(adev);
+
mutex_destroy(&adev->jpeg.jpeg_pg_lock);
return 0;
@@ -110,7 +114,7 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work)
unsigned int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- if (adev->jpeg.harvest_config & (1 << i))
+ if (adev->jpeg.harvest_config & (1U << i))
continue;
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j)
@@ -342,3 +346,193 @@ int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
return psp_execute_ip_fw_load(&adev->psp, &ucode);
}
+
+/*
+ * debugfs for to enable/disable jpeg job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_jpeg_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i, j;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << (adev->jpeg.num_jpeg_inst * adev->jpeg.num_jpeg_rings)) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (val & (1 << ((i * adev->jpeg.num_jpeg_rings) + j)))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_jpeg_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i, j;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->sched.ready)
+ mask |= 1ULL << ((i * adev->jpeg.num_jpeg_rings) + j);
+ }
+ }
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_jpeg_sched_mask_fops,
+ amdgpu_debugfs_jpeg_sched_mask_get,
+ amdgpu_debugfs_jpeg_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->jpeg.num_jpeg_inst > 1) && !(adev->jpeg.num_jpeg_rings > 1))
+ return;
+ sprintf(name, "amdgpu_jpeg_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_jpeg_sched_mask_fops);
+#endif
+}
+
+static ssize_t amdgpu_get_jpeg_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->jpeg.supported_reset);
+}
+
+static DEVICE_ATTR(jpeg_reset_mask, 0444,
+ amdgpu_get_jpeg_reset_mask, NULL);
+
+int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->jpeg.num_jpeg_inst) {
+ r = device_create_file(adev->dev, &dev_attr_jpeg_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->jpeg.num_jpeg_inst)
+ device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
+ }
+}
+
+int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count)
+{
+ adev->jpeg.ip_dump = kcalloc(adev->jpeg.num_jpeg_inst * count,
+ sizeof(uint32_t), GFP_KERNEL);
+ if (!adev->jpeg.ip_dump) {
+ DRM_ERROR("Failed to allocate memory for JPEG IP Dump\n");
+ return -ENOMEM;
+ }
+ adev->jpeg.reg_list = reg;
+ adev->jpeg.reg_count = count;
+
+ return 0;
+}
+
+static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->jpeg.ip_dump);
+ adev->jpeg.reg_list = NULL;
+ adev->jpeg.reg_count = 0;
+}
+
+void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 inst_off, inst_id, is_powered;
+ int i, j;
+
+ if (!adev->jpeg.ip_dump)
+ return;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ inst_id = GET_INST(JPEG, i);
+ inst_off = i * adev->jpeg.reg_count;
+ /* check power status from UVD_JPEG_POWER_STATUS */
+ adev->jpeg.ip_dump[inst_off] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[0],
+ inst_id));
+ is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
+
+ if (is_powered)
+ for (j = 1; j < adev->jpeg.reg_count; j++)
+ adev->jpeg.ip_dump[inst_off + j] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[j],
+ inst_id));
+ }
+}
+
+void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 inst_off, is_powered;
+ int i, j;
+
+ if (!adev->jpeg.ip_dump)
+ return;
+
+ drm_printf(p, "num_instances:%d\n", adev->jpeg.num_jpeg_inst);
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i)) {
+ drm_printf(p, "\nHarvested Instance:JPEG%d Skipping dump\n", i);
+ continue;
+ }
+
+ inst_off = i * adev->jpeg.reg_count;
+ is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
+
+ if (is_powered) {
+ drm_printf(p, "Active Instance:JPEG%d\n", i);
+ for (j = 0; j < adev->jpeg.reg_count; j++)
+ drm_printf(p, "%-50s \t 0x%08x\n", adev->jpeg.reg_list[j].reg_name,
+ adev->jpeg.ip_dump[inst_off + j]);
+ } else
+ drm_printf(p, "\nInactive Instance:JPEG%d\n", i);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index f9cdd873ac9b..4f0775e39b54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -27,7 +27,8 @@
#include "amdgpu_ras.h"
#define AMDGPU_MAX_JPEG_INSTANCES 4
-#define AMDGPU_MAX_JPEG_RINGS 8
+#define AMDGPU_MAX_JPEG_RINGS 10
+#define AMDGPU_MAX_JPEG_RINGS_4_0_3 8
#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
#define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1)
@@ -91,6 +92,14 @@
*adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = value; \
} while (0)
+struct amdgpu_hwip_reg_entry;
+
+enum amdgpu_jpeg_caps {
+ AMDGPU_JPEG_RRMT_ENABLED,
+};
+
+#define AMDGPU_JPEG_CAPS(caps) BIT(AMDGPU_JPEG_##caps)
+
struct amdgpu_jpeg_reg{
unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS];
};
@@ -128,6 +137,11 @@ struct amdgpu_jpeg {
uint16_t inst_mask;
uint8_t num_inst_per_aid;
bool indirect_sram;
+ uint32_t supported_reset;
+ uint32_t caps;
+ u32 *ip_dump;
+ u32 reg_count;
+ const struct amdgpu_hwip_reg_entry *reg_list;
};
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
@@ -149,5 +163,12 @@ int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev,
int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
enum AMDGPU_UCODE_ID ucode_id);
+void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev);
+int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
+ const struct amdgpu_hwip_reg_entry *reg, u32 count);
+void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block);
+void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p);
#endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 016a6f6c4267..d2ce7d86dbc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -45,6 +45,7 @@
#include "amdgpu_ras.h"
#include "amdgpu_reset.h"
#include "amd_pcie.h"
+#include "amdgpu_userq.h"
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
@@ -370,6 +371,26 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
return 0;
}
+static int amdgpu_userq_metadata_info_gfx(struct amdgpu_device *adev,
+ struct drm_amdgpu_info *info,
+ struct drm_amdgpu_info_uq_metadata_gfx *meta)
+{
+ int ret = -EOPNOTSUPP;
+
+ if (adev->gfx.funcs->get_gfx_shadow_info) {
+ struct amdgpu_gfx_shadow_info shadow = {};
+
+ adev->gfx.funcs->get_gfx_shadow_info(adev, &shadow, true);
+ meta->shadow_size = shadow.shadow_size;
+ meta->shadow_alignment = shadow.shadow_alignment;
+ meta->csa_size = shadow.csa_size;
+ meta->csa_alignment = shadow.csa_alignment;
+ ret = 0;
+ }
+
+ return ret;
+}
+
static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
struct drm_amdgpu_info *info,
struct drm_amdgpu_info_hw_ip *result)
@@ -387,7 +408,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_GFX:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- if (adev->gfx.gfx_ring[i].sched.ready)
+ if (adev->gfx.gfx_ring[i].sched.ready &&
+ !adev->gfx.gfx_ring[i].no_user_submission)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
@@ -395,7 +417,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_COMPUTE:
type = AMD_IP_BLOCK_TYPE_GFX;
for (i = 0; i < adev->gfx.num_compute_rings; i++)
- if (adev->gfx.compute_ring[i].sched.ready)
+ if (adev->gfx.compute_ring[i].sched.ready &&
+ !adev->gfx.compute_ring[i].no_user_submission)
++num_rings;
ib_start_alignment = 32;
ib_size_alignment = 32;
@@ -403,7 +426,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_DMA:
type = AMD_IP_BLOCK_TYPE_SDMA;
for (i = 0; i < adev->sdma.num_instances; i++)
- if (adev->sdma.instance[i].ring.sched.ready)
+ if (adev->sdma.instance[i].ring.sched.ready &&
+ !adev->sdma.instance[i].ring.no_user_submission)
++num_rings;
ib_start_alignment = 256;
ib_size_alignment = 4;
@@ -414,7 +438,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->uvd.harvest_config & (1 << i))
continue;
- if (adev->uvd.inst[i].ring.sched.ready)
+ if (adev->uvd.inst[i].ring.sched.ready &&
+ !adev->uvd.inst[i].ring.no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -423,7 +448,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
case AMDGPU_HW_IP_VCE:
type = AMD_IP_BLOCK_TYPE_VCE;
for (i = 0; i < adev->vce.num_rings; i++)
- if (adev->vce.ring[i].sched.ready)
+ if (adev->vce.ring[i].sched.ready &&
+ !adev->vce.ring[i].no_user_submission)
++num_rings;
ib_start_alignment = 256;
ib_size_alignment = 4;
@@ -435,7 +461,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
continue;
for (j = 0; j < adev->uvd.num_enc_rings; j++)
- if (adev->uvd.inst[i].ring_enc[j].sched.ready)
+ if (adev->uvd.inst[i].ring_enc[j].sched.ready &&
+ !adev->uvd.inst[i].ring_enc[j].no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -447,7 +474,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->vcn.harvest_config & (1 << i))
continue;
- if (adev->vcn.inst[i].ring_dec.sched.ready)
+ if (adev->vcn.inst[i].ring_dec.sched.ready &&
+ !adev->vcn.inst[i].ring_dec.no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -459,8 +487,9 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
if (adev->vcn.harvest_config & (1 << i))
continue;
- for (j = 0; j < adev->vcn.num_enc_rings; j++)
- if (adev->vcn.inst[i].ring_enc[j].sched.ready)
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++)
+ if (adev->vcn.inst[i].ring_enc[j].sched.ready &&
+ !adev->vcn.inst[i].ring_enc[j].no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -475,7 +504,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
continue;
for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
- if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
+ if (adev->jpeg.inst[i].ring_dec[j].sched.ready &&
+ !adev->jpeg.inst[i].ring_dec[j].no_user_submission)
++num_rings;
}
ib_start_alignment = 256;
@@ -483,7 +513,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
break;
case AMDGPU_HW_IP_VPE:
type = AMD_IP_BLOCK_TYPE_VPE;
- if (adev->vpe.ring.sched.ready)
+ if (adev->vpe.ring.sched.ready &&
+ !adev->vpe.ring.no_user_submission)
++num_rings;
ib_start_alignment = 256;
ib_size_alignment = 4;
@@ -846,7 +877,7 @@ out:
case AMDGPU_INFO_DEV_INFO: {
struct drm_amdgpu_info_device *dev_info;
uint64_t vm_size;
- uint32_t pcie_gen_mask;
+ uint32_t pcie_gen_mask, pcie_width_mask;
dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
if (!dev_info)
@@ -888,6 +919,15 @@ out:
if (adev->gfx.config.ta_cntl2_truncate_coord_mode)
dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
+ if (amdgpu_passthrough(adev))
+ dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_PT <<
+ AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+ AMDGPU_IDS_FLAGS_MODE_MASK;
+ else if (amdgpu_sriov_vf(adev))
+ dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_VF <<
+ AMDGPU_IDS_FLAGS_MODE_SHIFT) &
+ AMDGPU_IDS_FLAGS_MODE_MASK;
+
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
vm_size -= AMDGPU_VA_RESERVED_TOP;
@@ -934,15 +974,18 @@ out:
dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask;
/* Combine the chip gen mask with the platform (CPU/mobo) mask. */
- pcie_gen_mask = adev->pm.pcie_gen_mask & (adev->pm.pcie_gen_mask >> 16);
+ pcie_gen_mask = adev->pm.pcie_gen_mask &
+ (adev->pm.pcie_gen_mask >> CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT);
+ pcie_width_mask = adev->pm.pcie_mlw_mask &
+ (adev->pm.pcie_mlw_mask >> CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT);
dev_info->pcie_gen = fls(pcie_gen_mask);
dev_info->pcie_num_lanes =
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 :
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 :
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 :
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 :
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 :
- adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1;
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 :
+ pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1;
dev_info->tcp_cache_size = adev->gfx.config.gc_tcp_l1_size;
dev_info->num_sqc_per_wgp = adev->gfx.config.gc_num_sqc_per_wgp;
@@ -966,6 +1009,8 @@ out:
}
}
+ dev_info->userq_ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+
ret = copy_to_user(out, dev_info,
min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0;
kfree(dev_info);
@@ -1281,6 +1326,22 @@ out:
return copy_to_user(out, &gpuvm_fault,
min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0;
}
+ case AMDGPU_INFO_UQ_FW_AREAS: {
+ struct drm_amdgpu_info_uq_metadata meta_info = {};
+
+ switch (info->query_hw_ip.type) {
+ case AMDGPU_HW_IP_GFX:
+ ret = amdgpu_userq_metadata_info_gfx(adev, info, &meta_info.gfx);
+ if (ret)
+ return ret;
+
+ ret = copy_to_user(out, &meta_info,
+ min((size_t)size, sizeof(meta_info))) ? -EFAULT : 0;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ }
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
@@ -1364,6 +1425,14 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
mutex_init(&fpriv->bo_list_lock);
idr_init_base(&fpriv->bo_list_handles, 1);
+ r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, file_priv, adev);
+ if (r)
+ DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n");
+
+ r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
+ if (r)
+ goto error_vm;
+
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
file_priv->driver_priv = fpriv;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 18ee60378727..3ca03b5e0f91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -348,6 +348,24 @@ static bool amdgpu_mca_bank_should_update(struct amdgpu_device *adev, enum amdgp
return ret;
}
+static bool amdgpu_mca_bank_should_dump(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
+ struct mca_bank_entry *entry)
+{
+ bool ret;
+
+ switch (type) {
+ case AMDGPU_MCA_ERROR_TYPE_CE:
+ ret = amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]);
+ break;
+ case AMDGPU_MCA_ERROR_TYPE_UE:
+ default:
+ ret = true;
+ break;
+ }
+
+ return ret;
+}
+
static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set,
struct ras_query_context *qctx)
{
@@ -373,7 +391,8 @@ static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mc
amdgpu_mca_bank_set_add_entry(mca_set, &entry);
- amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx);
+ if (amdgpu_mca_bank_should_dump(adev, type, &entry))
+ amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 7d4b540340e0..6fa9fa11c8f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -39,42 +39,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
PAGE_SIZE);
}
-static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
- int ip_type, uint64_t *doorbell_index)
-{
- unsigned int offset, found;
- struct amdgpu_mes *mes = &adev->mes;
-
- if (ip_type == AMDGPU_RING_TYPE_SDMA)
- offset = adev->doorbell_index.sdma_engine[0];
- else
- offset = 0;
-
- found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset);
- if (found >= mes->num_mes_dbs) {
- DRM_WARN("No doorbell available\n");
- return -ENOSPC;
- }
-
- set_bit(found, mes->doorbell_bitmap);
-
- /* Get the absolute doorbell index on BAR */
- *doorbell_index = mes->db_start_dw_offset + found * 2;
- return 0;
-}
-
-static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,
- uint32_t doorbell_index)
-{
- unsigned int old, rel_index;
- struct amdgpu_mes *mes = &adev->mes;
-
- /* Find the relative index of the doorbell in this object */
- rel_index = (doorbell_index - mes->db_start_dw_offset) / 2;
- old = test_and_clear_bit(rel_index, mes->doorbell_bitmap);
- WARN_ON(!old);
-}
-
static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
{
int i;
@@ -104,7 +68,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
return 0;
r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_DOMAIN_VRAM,
&adev->mes.event_log_gpu_obj,
&adev->mes.event_log_gpu_addr,
&adev->mes.event_log_cpu_addr);
@@ -126,7 +90,7 @@ static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
int amdgpu_mes_init(struct amdgpu_device *adev)
{
- int i, r;
+ int i, r, num_pipes;
adev->mes.adev = adev;
@@ -142,27 +106,54 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
adev->mes.vmid_mask_mmhub = 0xffffff00;
- adev->mes.vmid_mask_gfxhub = 0xffffff00;
+ adev->mes.vmid_mask_gfxhub = adev->gfx.disable_kq ? 0xfffffffe : 0xffffff00;
+
+ num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me;
+ if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES)
+ dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_GFX_PIPES);
+
+ for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) {
+ if (i >= num_pipes)
+ break;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
+ IP_VERSION(12, 0, 0))
+ /*
+ * GFX V12 has only one GFX pipe, but 8 queues in it.
+ * GFX pipe 0 queue 0 is being used by Kernel queue.
+ * Set GFX pipe 0 queue 1-7 for MES scheduling
+ * mask = 1111 1110b
+ */
+ adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0xFF : 0xFE;
+ else
+ /*
+ * GFX pipe 0 queue 0 is being used by Kernel queue.
+ * Set GFX pipe 0 queue 1 for MES scheduling
+ * mask = 10b
+ */
+ adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 0x3 : 0x2;
+ }
+
+ num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec;
+ if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES)
+ dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES);
for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
- /* use only 1st MEC pipes */
- if (i >= adev->gfx.mec.num_pipe_per_mec)
- continue;
- adev->mes.compute_hqd_mask[i] = 0xc;
+ if (i >= num_pipes)
+ break;
+ adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF : 0xC;
}
- for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
- adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
+ num_pipes = adev->sdma.num_instances;
+ if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES)
+ dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n",
+ num_pipes, AMDGPU_MES_MAX_SDMA_PIPES);
for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
- if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) <
- IP_VERSION(6, 0, 0))
- adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
- /* zero sdma_hqd_mask for non-existent engine */
- else if (adev->sdma.num_instances == 1)
- adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc;
- else
- adev->mes.sdma_hqd_mask[i] = 0xfc;
+ if (i >= num_pipes)
+ break;
+ adev->mes.sdma_hqd_mask[i] = 0xfc;
}
for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
@@ -192,17 +183,6 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]];
}
- r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
- if (r) {
- dev_err(adev->dev,
- "(%d) read_val_offs alloc failed\n", r);
- goto error;
- }
- adev->mes.read_val_gpu_addr =
- adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
- adev->mes.read_val_ptr =
- (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
-
r = amdgpu_mes_doorbell_init(adev);
if (r)
goto error;
@@ -223,8 +203,6 @@ error:
amdgpu_device_wb_free(adev,
adev->mes.query_status_fence_offs[i]);
}
- if (adev->mes.read_val_ptr)
- amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
idr_destroy(&adev->mes.pasid_idr);
idr_destroy(&adev->mes.gang_id_idr);
@@ -249,8 +227,6 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
amdgpu_device_wb_free(adev,
adev->mes.query_status_fence_offs[i]);
}
- if (adev->mes.read_val_ptr)
- amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
amdgpu_mes_doorbell_free(adev);
@@ -261,244 +237,6 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
mutex_destroy(&adev->mes.mutex_hidden);
}
-static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
-{
- amdgpu_bo_free_kernel(&q->mqd_obj,
- &q->mqd_gpu_addr,
- &q->mqd_cpu_ptr);
-}
-
-int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
- struct amdgpu_vm *vm)
-{
- struct amdgpu_mes_process *process;
- int r;
-
- /* allocate the mes process buffer */
- process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
- if (!process) {
- DRM_ERROR("no more memory to create mes process\n");
- return -ENOMEM;
- }
-
- /* allocate the process context bo and map it */
- r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &process->proc_ctx_bo,
- &process->proc_ctx_gpu_addr,
- &process->proc_ctx_cpu_ptr);
- if (r) {
- DRM_ERROR("failed to allocate process context bo\n");
- goto clean_up_memory;
- }
- memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
-
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
-
- /* add the mes process to idr list */
- r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
- GFP_KERNEL);
- if (r < 0) {
- DRM_ERROR("failed to lock pasid=%d\n", pasid);
- goto clean_up_ctx;
- }
-
- INIT_LIST_HEAD(&process->gang_list);
- process->vm = vm;
- process->pasid = pasid;
- process->process_quantum = adev->mes.default_process_quantum;
- process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
-
- amdgpu_mes_unlock(&adev->mes);
- return 0;
-
-clean_up_ctx:
- amdgpu_mes_unlock(&adev->mes);
- amdgpu_bo_free_kernel(&process->proc_ctx_bo,
- &process->proc_ctx_gpu_addr,
- &process->proc_ctx_cpu_ptr);
-clean_up_memory:
- kfree(process);
- return r;
-}
-
-void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
-{
- struct amdgpu_mes_process *process;
- struct amdgpu_mes_gang *gang, *tmp1;
- struct amdgpu_mes_queue *queue, *tmp2;
- struct mes_remove_queue_input queue_input;
- unsigned long flags;
- int r;
-
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
-
- process = idr_find(&adev->mes.pasid_idr, pasid);
- if (!process) {
- DRM_WARN("pasid %d doesn't exist\n", pasid);
- amdgpu_mes_unlock(&adev->mes);
- return;
- }
-
- /* Remove all queues from hardware */
- list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
- list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
- spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
- idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-
- queue_input.doorbell_offset = queue->doorbell_off;
- queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
-
- r = adev->mes.funcs->remove_hw_queue(&adev->mes,
- &queue_input);
- if (r)
- DRM_WARN("failed to remove hardware queue\n");
- }
-
- idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
- }
-
- idr_remove(&adev->mes.pasid_idr, pasid);
- amdgpu_mes_unlock(&adev->mes);
-
- /* free all memory allocated by the process */
- list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
- /* free all queues in the gang */
- list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
- amdgpu_mes_queue_free_mqd(queue);
- list_del(&queue->list);
- kfree(queue);
- }
- amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
- &gang->gang_ctx_gpu_addr,
- &gang->gang_ctx_cpu_ptr);
- list_del(&gang->list);
- kfree(gang);
-
- }
- amdgpu_bo_free_kernel(&process->proc_ctx_bo,
- &process->proc_ctx_gpu_addr,
- &process->proc_ctx_cpu_ptr);
- kfree(process);
-}
-
-int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
- struct amdgpu_mes_gang_properties *gprops,
- int *gang_id)
-{
- struct amdgpu_mes_process *process;
- struct amdgpu_mes_gang *gang;
- int r;
-
- /* allocate the mes gang buffer */
- gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
- if (!gang) {
- return -ENOMEM;
- }
-
- /* allocate the gang context bo and map it to cpu space */
- r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &gang->gang_ctx_bo,
- &gang->gang_ctx_gpu_addr,
- &gang->gang_ctx_cpu_ptr);
- if (r) {
- DRM_ERROR("failed to allocate process context bo\n");
- goto clean_up_mem;
- }
- memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
-
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
-
- process = idr_find(&adev->mes.pasid_idr, pasid);
- if (!process) {
- DRM_ERROR("pasid %d doesn't exist\n", pasid);
- r = -EINVAL;
- goto clean_up_ctx;
- }
-
- /* add the mes gang to idr list */
- r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
- GFP_KERNEL);
- if (r < 0) {
- DRM_ERROR("failed to allocate idr for gang\n");
- goto clean_up_ctx;
- }
-
- gang->gang_id = r;
- *gang_id = r;
-
- INIT_LIST_HEAD(&gang->queue_list);
- gang->process = process;
- gang->priority = gprops->priority;
- gang->gang_quantum = gprops->gang_quantum ?
- gprops->gang_quantum : adev->mes.default_gang_quantum;
- gang->global_priority_level = gprops->global_priority_level;
- gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
- list_add_tail(&gang->list, &process->gang_list);
-
- amdgpu_mes_unlock(&adev->mes);
- return 0;
-
-clean_up_ctx:
- amdgpu_mes_unlock(&adev->mes);
- amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
- &gang->gang_ctx_gpu_addr,
- &gang->gang_ctx_cpu_ptr);
-clean_up_mem:
- kfree(gang);
- return r;
-}
-
-int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
-{
- struct amdgpu_mes_gang *gang;
-
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
-
- gang = idr_find(&adev->mes.gang_id_idr, gang_id);
- if (!gang) {
- DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- amdgpu_mes_unlock(&adev->mes);
- return -EINVAL;
- }
-
- if (!list_empty(&gang->queue_list)) {
- DRM_ERROR("queue list is not empty\n");
- amdgpu_mes_unlock(&adev->mes);
- return -EBUSY;
- }
-
- idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
- list_del(&gang->list);
- amdgpu_mes_unlock(&adev->mes);
-
- amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
- &gang->gang_ctx_gpu_addr,
- &gang->gang_ctx_cpu_ptr);
-
- kfree(gang);
-
- return 0;
-}
-
int amdgpu_mes_suspend(struct amdgpu_device *adev)
{
struct mes_suspend_gang_input input;
@@ -547,304 +285,6 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
return r;
}
-static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
- struct amdgpu_mes_queue *q,
- struct amdgpu_mes_queue_properties *p)
-{
- struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
- u32 mqd_size = mqd_mgr->mqd_size;
- int r;
-
- r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT,
- &q->mqd_obj,
- &q->mqd_gpu_addr, &q->mqd_cpu_ptr);
- if (r) {
- dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r);
- return r;
- }
- memset(q->mqd_cpu_ptr, 0, mqd_size);
-
- r = amdgpu_bo_reserve(q->mqd_obj, false);
- if (unlikely(r != 0))
- goto clean_up;
-
- return 0;
-
-clean_up:
- amdgpu_bo_free_kernel(&q->mqd_obj,
- &q->mqd_gpu_addr,
- &q->mqd_cpu_ptr);
- return r;
-}
-
-static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
- struct amdgpu_mes_queue *q,
- struct amdgpu_mes_queue_properties *p)
-{
- struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
- struct amdgpu_mqd_prop mqd_prop = {0};
-
- mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
- mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
- mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
- mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr;
- mqd_prop.queue_size = p->queue_size;
- mqd_prop.use_doorbell = true;
- mqd_prop.doorbell_index = p->doorbell_off;
- mqd_prop.eop_gpu_addr = p->eop_gpu_addr;
- mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority;
- mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
- mqd_prop.hqd_active = false;
-
- if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
- p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
- mutex_lock(&adev->srbm_mutex);
- amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0);
- }
-
- mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
-
- if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
- p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
- amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
- }
-
- amdgpu_bo_unreserve(q->mqd_obj);
-}
-
-int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
- struct amdgpu_mes_queue_properties *qprops,
- int *queue_id)
-{
- struct amdgpu_mes_queue *queue;
- struct amdgpu_mes_gang *gang;
- struct mes_add_queue_input queue_input;
- unsigned long flags;
- int r;
-
- memset(&queue_input, 0, sizeof(struct mes_add_queue_input));
-
- /* allocate the mes queue buffer */
- queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
- if (!queue) {
- DRM_ERROR("Failed to allocate memory for queue\n");
- return -ENOMEM;
- }
-
- /* Allocate the queue mqd */
- r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
- if (r)
- goto clean_up_memory;
-
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
-
- gang = idr_find(&adev->mes.gang_id_idr, gang_id);
- if (!gang) {
- DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- r = -EINVAL;
- goto clean_up_mqd;
- }
-
- /* add the mes gang to idr list */
- spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
- r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
- GFP_ATOMIC);
- if (r < 0) {
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- goto clean_up_mqd;
- }
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- *queue_id = queue->queue_id = r;
-
- /* allocate a doorbell index for the queue */
- r = amdgpu_mes_kernel_doorbell_get(adev,
- qprops->queue_type,
- &qprops->doorbell_off);
- if (r)
- goto clean_up_queue_id;
-
- /* initialize the queue mqd */
- amdgpu_mes_queue_init_mqd(adev, queue, qprops);
-
- /* add hw queue to mes */
- queue_input.process_id = gang->process->pasid;
-
- queue_input.page_table_base_addr =
- adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
- adev->gmc.vram_start;
-
- queue_input.process_va_start = 0;
- queue_input.process_va_end =
- (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
- queue_input.process_quantum = gang->process->process_quantum;
- queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr;
- queue_input.gang_quantum = gang->gang_quantum;
- queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
- queue_input.inprocess_gang_priority = gang->inprocess_gang_priority;
- queue_input.gang_global_priority_level = gang->global_priority_level;
- queue_input.doorbell_offset = qprops->doorbell_off;
- queue_input.mqd_addr = queue->mqd_gpu_addr;
- queue_input.wptr_addr = qprops->wptr_gpu_addr;
- queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
- queue_input.queue_type = qprops->queue_type;
- queue_input.paging = qprops->paging;
- queue_input.is_kfd_process = 0;
-
- r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
- if (r) {
- DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
- qprops->doorbell_off);
- goto clean_up_doorbell;
- }
-
- DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
- "queue type=%d, doorbell=0x%llx\n",
- gang->process->pasid, gang_id, qprops->queue_type,
- qprops->doorbell_off);
-
- queue->ring = qprops->ring;
- queue->doorbell_off = qprops->doorbell_off;
- queue->wptr_gpu_addr = qprops->wptr_gpu_addr;
- queue->queue_type = qprops->queue_type;
- queue->paging = qprops->paging;
- queue->gang = gang;
- queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
- list_add_tail(&queue->list, &gang->queue_list);
-
- amdgpu_mes_unlock(&adev->mes);
- return 0;
-
-clean_up_doorbell:
- amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off);
-clean_up_queue_id:
- spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
- idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-clean_up_mqd:
- amdgpu_mes_unlock(&adev->mes);
- amdgpu_mes_queue_free_mqd(queue);
-clean_up_memory:
- kfree(queue);
- return r;
-}
-
-int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
-{
- unsigned long flags;
- struct amdgpu_mes_queue *queue;
- struct amdgpu_mes_gang *gang;
- struct mes_remove_queue_input queue_input;
- int r;
-
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
-
- /* remove the mes gang from idr list */
- spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
-
- queue = idr_find(&adev->mes.queue_id_idr, queue_id);
- if (!queue) {
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- amdgpu_mes_unlock(&adev->mes);
- DRM_ERROR("queue id %d doesn't exist\n", queue_id);
- return -EINVAL;
- }
-
- idr_remove(&adev->mes.queue_id_idr, queue_id);
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-
- DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n",
- queue->doorbell_off);
-
- gang = queue->gang;
- queue_input.doorbell_offset = queue->doorbell_off;
- queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
-
- r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
- if (r)
- DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
- queue_id);
-
- list_del(&queue->list);
- amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off);
- amdgpu_mes_unlock(&adev->mes);
-
- amdgpu_mes_queue_free_mqd(queue);
- kfree(queue);
- return 0;
-}
-
-int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id)
-{
- unsigned long flags;
- struct amdgpu_mes_queue *queue;
- struct amdgpu_mes_gang *gang;
- struct mes_reset_queue_input queue_input;
- int r;
-
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
-
- /* remove the mes gang from idr list */
- spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
-
- queue = idr_find(&adev->mes.queue_id_idr, queue_id);
- if (!queue) {
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- amdgpu_mes_unlock(&adev->mes);
- DRM_ERROR("queue id %d doesn't exist\n", queue_id);
- return -EINVAL;
- }
- spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-
- DRM_DEBUG("try to reset queue, doorbell off = 0x%llx\n",
- queue->doorbell_off);
-
- gang = queue->gang;
- queue_input.doorbell_offset = queue->doorbell_off;
- queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
-
- r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
- if (r)
- DRM_ERROR("failed to reset hardware queue, queue id = %d\n",
- queue_id);
-
- amdgpu_mes_unlock(&adev->mes);
-
- return 0;
-}
-
-int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type,
- int me_id, int pipe_id, int queue_id, int vmid)
-{
- struct mes_reset_queue_input queue_input;
- int r;
-
- queue_input.queue_type = queue_type;
- queue_input.use_mmio = true;
- queue_input.me_id = me_id;
- queue_input.pipe_id = pipe_id;
- queue_input.queue_id = queue_id;
- queue_input.vmid = vmid;
- r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
- if (r)
- DRM_ERROR("failed to reset hardware queue by mmio, queue id = %d\n",
- queue_id);
- return r;
-}
-
int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
@@ -860,7 +300,9 @@ int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
queue_input.wptr_addr = ring->wptr_gpu_addr;
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
DRM_ERROR("failed to map legacy queue\n");
@@ -883,7 +325,9 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
queue_input.trail_fence_addr = gpu_addr;
queue_input.trail_fence_data = seq;
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
DRM_ERROR("failed to unmap legacy queue\n");
@@ -895,7 +339,7 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
unsigned int vmid,
bool use_mmio)
{
- struct mes_reset_legacy_queue_input queue_input;
+ struct mes_reset_queue_input queue_input;
int r;
memset(&queue_input, 0, sizeof(queue_input));
@@ -905,12 +349,17 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
queue_input.me_id = ring->me;
queue_input.pipe_id = ring->pipe;
queue_input.queue_id = ring->queue;
- queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ queue_input.mqd_addr = ring->mqd_obj ? amdgpu_bo_gpu_offset(ring->mqd_obj) : 0;
queue_input.wptr_addr = ring->wptr_gpu_addr;
queue_input.vmid = vmid;
queue_input.use_mmio = use_mmio;
+ queue_input.is_kq = true;
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX)
+ queue_input.legacy_gfx = true;
- r = adev->mes.funcs->reset_legacy_queue(&adev->mes, &queue_input);
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
DRM_ERROR("failed to reset legacy queue\n");
@@ -921,23 +370,36 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
{
struct mes_misc_op_input op_input;
int r, val = 0;
+ uint32_t addr_offset = 0;
+ uint64_t read_val_gpu_addr;
+ uint32_t *read_val_ptr;
+ if (amdgpu_device_wb_get(adev, &addr_offset)) {
+ dev_err(adev->dev, "critical bug! too many mes readers\n");
+ goto error;
+ }
+ read_val_gpu_addr = adev->wb.gpu_addr + (addr_offset * 4);
+ read_val_ptr = (uint32_t *)&adev->wb.wb[addr_offset];
op_input.op = MES_MISC_OP_READ_REG;
op_input.read_reg.reg_offset = reg;
- op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
+ op_input.read_reg.buffer_addr = read_val_gpu_addr;
if (!adev->mes.funcs->misc_op) {
- DRM_ERROR("mes rreg is not supported!\n");
+ dev_err(adev->dev, "mes rreg is not supported!\n");
goto error;
}
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
- DRM_ERROR("failed to read reg (0x%x)\n", reg);
+ dev_err(adev->dev, "failed to read reg (0x%x)\n", reg);
else
- val = *(adev->mes.read_val_ptr);
+ val = *(read_val_ptr);
error:
+ if (addr_offset)
+ amdgpu_device_wb_free(adev, addr_offset);
return val;
}
@@ -952,14 +414,16 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev,
op_input.write_reg.reg_value = val;
if (!adev->mes.funcs->misc_op) {
- DRM_ERROR("mes wreg is not supported!\n");
+ dev_err(adev->dev, "mes wreg is not supported!\n");
r = -EINVAL;
goto error;
}
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
- DRM_ERROR("failed to write reg (0x%x)\n", reg);
+ dev_err(adev->dev, "failed to write reg (0x%x)\n", reg);
error:
return r;
@@ -979,39 +443,16 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
op_input.wrm_reg.mask = mask;
if (!adev->mes.funcs->misc_op) {
- DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
- r = -EINVAL;
- goto error;
- }
-
- r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
- if (r)
- DRM_ERROR("failed to reg_write_reg_wait\n");
-
-error:
- return r;
-}
-
-int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
- uint32_t val, uint32_t mask)
-{
- struct mes_misc_op_input op_input;
- int r;
-
- op_input.op = MES_MISC_OP_WRM_REG_WAIT;
- op_input.wrm_reg.reg0 = reg;
- op_input.wrm_reg.ref = val;
- op_input.wrm_reg.mask = mask;
-
- if (!adev->mes.funcs->misc_op) {
- DRM_ERROR("mes reg wait is not supported!\n");
+ dev_err(adev->dev, "mes reg_write_reg_wait is not supported!\n");
r = -EINVAL;
goto error;
}
+ amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
if (r)
- DRM_ERROR("failed to reg_write_reg_wait\n");
+ dev_err(adev->dev, "failed to reg_write_reg_wait\n");
error:
return r;
@@ -1085,508 +526,12 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
return r;
}
-static void
-amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_mes_queue_properties *props)
-{
- props->queue_type = ring->funcs->type;
- props->hqd_base_gpu_addr = ring->gpu_addr;
- props->rptr_gpu_addr = ring->rptr_gpu_addr;
- props->wptr_gpu_addr = ring->wptr_gpu_addr;
- props->wptr_mc_addr =
- ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
- props->queue_size = ring->ring_size;
- props->eop_gpu_addr = ring->eop_gpu_addr;
- props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
- props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
- props->paging = false;
- props->ring = ring;
-}
-
-#define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng) \
-do { \
- if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) \
- return offsetof(struct amdgpu_mes_ctx_meta_data, \
- _eng[ring->idx].slots[id_offs]); \
- else if (id_offs == AMDGPU_MES_CTX_RING_OFFS) \
- return offsetof(struct amdgpu_mes_ctx_meta_data, \
- _eng[ring->idx].ring); \
- else if (id_offs == AMDGPU_MES_CTX_IB_OFFS) \
- return offsetof(struct amdgpu_mes_ctx_meta_data, \
- _eng[ring->idx].ib); \
- else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS) \
- return offsetof(struct amdgpu_mes_ctx_meta_data, \
- _eng[ring->idx].padding); \
-} while(0)
-
-int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs)
-{
- switch (ring->funcs->type) {
- case AMDGPU_RING_TYPE_GFX:
- DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx);
- break;
- case AMDGPU_RING_TYPE_COMPUTE:
- DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute);
- break;
- case AMDGPU_RING_TYPE_SDMA:
- DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma);
- break;
- default:
- break;
- }
-
- WARN_ON(1);
- return -EINVAL;
-}
-
-int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
- int queue_type, int idx,
- struct amdgpu_mes_ctx_data *ctx_data,
- struct amdgpu_ring **out)
-{
- struct amdgpu_ring *ring;
- struct amdgpu_mes_gang *gang;
- struct amdgpu_mes_queue_properties qprops = {0};
- int r, queue_id, pasid;
-
- /*
- * Avoid taking any other locks under MES lock to avoid circular
- * lock dependencies.
- */
- amdgpu_mes_lock(&adev->mes);
- gang = idr_find(&adev->mes.gang_id_idr, gang_id);
- if (!gang) {
- DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- amdgpu_mes_unlock(&adev->mes);
- return -EINVAL;
- }
- pasid = gang->process->pasid;
-
- ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
- if (!ring) {
- amdgpu_mes_unlock(&adev->mes);
- return -ENOMEM;
- }
-
- ring->ring_obj = NULL;
- ring->use_doorbell = true;
- ring->is_mes_queue = true;
- ring->mes_ctx = ctx_data;
- ring->idx = idx;
- ring->no_scheduler = true;
-
- if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
- int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- compute[ring->idx].mec_hpd);
- ring->eop_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- }
-
- switch (queue_type) {
- case AMDGPU_RING_TYPE_GFX:
- ring->funcs = adev->gfx.gfx_ring[0].funcs;
- ring->me = adev->gfx.gfx_ring[0].me;
- ring->pipe = adev->gfx.gfx_ring[0].pipe;
- break;
- case AMDGPU_RING_TYPE_COMPUTE:
- ring->funcs = adev->gfx.compute_ring[0].funcs;
- ring->me = adev->gfx.compute_ring[0].me;
- ring->pipe = adev->gfx.compute_ring[0].pipe;
- break;
- case AMDGPU_RING_TYPE_SDMA:
- ring->funcs = adev->sdma.instance[0].ring.funcs;
- break;
- default:
- BUG();
- }
-
- r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
- if (r) {
- amdgpu_mes_unlock(&adev->mes);
- goto clean_up_memory;
- }
-
- amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
-
- dma_fence_wait(gang->process->vm->last_update, false);
- dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
- amdgpu_mes_unlock(&adev->mes);
-
- r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
- if (r)
- goto clean_up_ring;
-
- ring->hw_queue_id = queue_id;
- ring->doorbell_index = qprops.doorbell_off;
-
- if (queue_type == AMDGPU_RING_TYPE_GFX)
- sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
- else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
- sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
- queue_id);
- else if (queue_type == AMDGPU_RING_TYPE_SDMA)
- sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
- queue_id);
- else
- BUG();
-
- *out = ring;
- return 0;
-
-clean_up_ring:
- amdgpu_ring_fini(ring);
-clean_up_memory:
- kfree(ring);
- return r;
-}
-
-void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring)
- return;
-
- amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
- del_timer_sync(&ring->fence_drv.fallback_timer);
- amdgpu_ring_fini(ring);
- kfree(ring);
-}
-
uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
enum amdgpu_mes_priority_level prio)
{
return adev->mes.aggregated_doorbells[prio];
}
-int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
- struct amdgpu_mes_ctx_data *ctx_data)
-{
- int r;
-
- r = amdgpu_bo_create_kernel(adev,
- sizeof(struct amdgpu_mes_ctx_meta_data),
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- &ctx_data->meta_data_obj,
- &ctx_data->meta_data_mc_addr,
- &ctx_data->meta_data_ptr);
- if (r) {
- dev_warn(adev->dev, "(%d) create CTX bo failed\n", r);
- return r;
- }
-
- if (!ctx_data->meta_data_obj)
- return -ENOMEM;
-
- memset(ctx_data->meta_data_ptr, 0,
- sizeof(struct amdgpu_mes_ctx_meta_data));
-
- return 0;
-}
-
-void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
-{
- if (ctx_data->meta_data_obj)
- amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
- &ctx_data->meta_data_mc_addr,
- &ctx_data->meta_data_ptr);
-}
-
-int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_mes_ctx_data *ctx_data)
-{
- struct amdgpu_bo_va *bo_va;
- struct amdgpu_sync sync;
- struct drm_exec exec;
- int r;
-
- amdgpu_sync_create(&sync);
-
- drm_exec_init(&exec, 0, 0);
- drm_exec_until_all_locked(&exec) {
- r = drm_exec_lock_obj(&exec,
- &ctx_data->meta_data_obj->tbo.base);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto error_fini_exec;
-
- r = amdgpu_vm_lock_pd(vm, &exec, 0);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto error_fini_exec;
- }
-
- bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
- if (!bo_va) {
- DRM_ERROR("failed to create bo_va for meta data BO\n");
- r = -ENOMEM;
- goto error_fini_exec;
- }
-
- r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
- sizeof(struct amdgpu_mes_ctx_meta_data),
- AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
- AMDGPU_PTE_EXECUTABLE);
-
- if (r) {
- DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
- goto error_del_bo_va;
- }
-
- r = amdgpu_vm_bo_update(adev, bo_va, false);
- if (r) {
- DRM_ERROR("failed to do vm_bo_update on meta data\n");
- goto error_del_bo_va;
- }
- amdgpu_sync_fence(&sync, bo_va->last_pt_update);
-
- r = amdgpu_vm_update_pdes(adev, vm, false);
- if (r) {
- DRM_ERROR("failed to update pdes on meta data\n");
- goto error_del_bo_va;
- }
- amdgpu_sync_fence(&sync, vm->last_update);
-
- amdgpu_sync_wait(&sync, false);
- drm_exec_fini(&exec);
-
- amdgpu_sync_free(&sync);
- ctx_data->meta_data_va = bo_va;
- return 0;
-
-error_del_bo_va:
- amdgpu_vm_bo_del(adev, bo_va);
-
-error_fini_exec:
- drm_exec_fini(&exec);
- amdgpu_sync_free(&sync);
- return r;
-}
-
-int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
- struct amdgpu_mes_ctx_data *ctx_data)
-{
- struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
- struct amdgpu_bo *bo = ctx_data->meta_data_obj;
- struct amdgpu_vm *vm = bo_va->base.vm;
- struct dma_fence *fence;
- struct drm_exec exec;
- long r;
-
- drm_exec_init(&exec, 0, 0);
- drm_exec_until_all_locked(&exec) {
- r = drm_exec_lock_obj(&exec,
- &ctx_data->meta_data_obj->tbo.base);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto out_unlock;
-
- r = amdgpu_vm_lock_pd(vm, &exec, 0);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto out_unlock;
- }
-
- amdgpu_vm_bo_del(adev, bo_va);
- if (!amdgpu_vm_ready(vm))
- goto out_unlock;
-
- r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
- &fence);
- if (r)
- goto out_unlock;
- if (fence) {
- amdgpu_bo_fence(bo, fence, true);
- fence = NULL;
- }
-
- r = amdgpu_vm_clear_freed(adev, vm, &fence);
- if (r || !fence)
- goto out_unlock;
-
- dma_fence_wait(fence, false);
- amdgpu_bo_fence(bo, fence, true);
- dma_fence_put(fence);
-
-out_unlock:
- if (unlikely(r < 0))
- dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
- drm_exec_fini(&exec);
-
- return r;
-}
-
-static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
- int pasid, int *gang_id,
- int queue_type, int num_queue,
- struct amdgpu_ring **added_rings,
- struct amdgpu_mes_ctx_data *ctx_data)
-{
- struct amdgpu_ring *ring;
- struct amdgpu_mes_gang_properties gprops = {0};
- int r, j;
-
- /* create a gang for the process */
- gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
- gprops.gang_quantum = adev->mes.default_gang_quantum;
- gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
- gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
- gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
-
- r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id);
- if (r) {
- DRM_ERROR("failed to add gang\n");
- return r;
- }
-
- /* create queues for the gang */
- for (j = 0; j < num_queue; j++) {
- r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j,
- ctx_data, &ring);
- if (r) {
- DRM_ERROR("failed to add ring\n");
- break;
- }
-
- DRM_INFO("ring %s was added\n", ring->name);
- added_rings[j] = ring;
- }
-
- return 0;
-}
-
-static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
-{
- struct amdgpu_ring *ring;
- int i, r;
-
- for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) {
- ring = added_rings[i];
- if (!ring)
- continue;
-
- r = amdgpu_ring_test_helper(ring);
- if (r)
- return r;
-
- r = amdgpu_ring_test_ib(ring, 1000 * 10);
- if (r) {
- DRM_DEV_ERROR(ring->adev->dev,
- "ring %s ib test failed (%d)\n",
- ring->name, r);
- return r;
- } else
- DRM_INFO("ring %s ib test pass\n", ring->name);
- }
-
- return 0;
-}
-
-int amdgpu_mes_self_test(struct amdgpu_device *adev)
-{
- struct amdgpu_vm *vm = NULL;
- struct amdgpu_mes_ctx_data ctx_data = {0};
- struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL };
- int gang_ids[3] = {0};
- int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 },
- { AMDGPU_RING_TYPE_COMPUTE, 1 },
- { AMDGPU_RING_TYPE_SDMA, 1} };
- int i, r, pasid, k = 0;
-
- pasid = amdgpu_pasid_alloc(16);
- if (pasid < 0) {
- dev_warn(adev->dev, "No more PASIDs available!");
- pasid = 0;
- }
-
- vm = kzalloc(sizeof(*vm), GFP_KERNEL);
- if (!vm) {
- r = -ENOMEM;
- goto error_pasid;
- }
-
- r = amdgpu_vm_init(adev, vm, -1);
- if (r) {
- DRM_ERROR("failed to initialize vm\n");
- goto error_pasid;
- }
-
- r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
- if (r) {
- DRM_ERROR("failed to alloc ctx meta data\n");
- goto error_fini;
- }
-
- ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM;
- r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
- if (r) {
- DRM_ERROR("failed to map ctx meta data\n");
- goto error_vm;
- }
-
- r = amdgpu_mes_create_process(adev, pasid, vm);
- if (r) {
- DRM_ERROR("failed to create MES process\n");
- goto error_vm;
- }
-
- for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
- /* On GFX v10.3, fw hasn't supported to map sdma queue. */
- if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
- IP_VERSION(10, 3, 0) &&
- amdgpu_ip_version(adev, GC_HWIP, 0) <
- IP_VERSION(11, 0, 0) &&
- queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
- continue;
-
- r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
- &gang_ids[i],
- queue_types[i][0],
- queue_types[i][1],
- &added_rings[k],
- &ctx_data);
- if (r)
- goto error_queues;
-
- k += queue_types[i][1];
- }
-
- /* start ring test and ib test for MES queues */
- amdgpu_mes_test_queues(added_rings);
-
-error_queues:
- /* remove all queues */
- for (i = 0; i < ARRAY_SIZE(added_rings); i++) {
- if (!added_rings[i])
- continue;
- amdgpu_mes_remove_ring(adev, added_rings[i]);
- }
-
- for (i = 0; i < ARRAY_SIZE(gang_ids); i++) {
- if (!gang_ids[i])
- continue;
- amdgpu_mes_remove_gang(adev, gang_ids[i]);
- }
-
- amdgpu_mes_destroy_process(adev, pasid);
-
-error_vm:
- amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
-
-error_fini:
- amdgpu_vm_fini(adev, vm);
-
-error_pasid:
- if (pasid)
- amdgpu_pasid_free(pasid);
-
- amdgpu_mes_ctx_free_meta_data(&ctx_data);
- kfree(vm);
- return 0;
-}
-
int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
{
const struct mes_firmware_header_v1_0 *mes_hdr;
@@ -1594,6 +539,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
char ucode_prefix[30];
char fw_name[50];
bool need_retry = false;
+ u32 *ucode_ptr;
int r;
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
@@ -1613,10 +559,12 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
}
- r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], "%s", fw_name);
+ r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED,
+ "%s", fw_name);
if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) {
dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix);
r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe],
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mes.bin", ucode_prefix);
}
@@ -1631,6 +579,10 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
adev->mes.data_start_addr[pipe] =
le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
+ ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data +
+ sizeof(union amdgpu_firmware_header));
+ adev->mes.fw_version[pipe] =
+ le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
int ucode, ucode_data;
@@ -1677,6 +629,49 @@ bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev)
return is_supported;
}
+/* Fix me -- node_id is used to identify the correct MES instances in the future */
+static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev,
+ uint32_t node_id, bool enable)
+{
+ struct mes_misc_op_input op_input = {0};
+ int r;
+
+ op_input.op = MES_MISC_OP_CHANGE_CONFIG;
+ op_input.change_config.option.limit_single_process = enable ? 1 : 0;
+
+ if (!adev->mes.funcs->misc_op) {
+ dev_err(adev->dev, "mes change config is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ dev_err(adev->dev, "failed to change_config.\n");
+
+error:
+ return r;
+}
+
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev)
+{
+ int i, r = 0;
+
+ if (adev->enable_mes && adev->gfx.enable_cleaner_shader) {
+ mutex_lock(&adev->enforce_isolation_mutex);
+ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+ if (adev->enforce_isolation[i] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ r |= amdgpu_mes_set_enforce_isolation(adev, i, true);
+ else
+ r |= amdgpu_mes_set_enforce_isolation(adev, i, false);
+ }
+ mutex_unlock(&adev->enforce_isolation_mutex);
+ }
+ return r;
+}
+
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 96788c0f42f1..c0d2c195fe2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -40,6 +40,7 @@
#define AMDGPU_MES_VERSION_MASK 0x00000fff
#define AMDGPU_MES_API_VERSION_MASK 0x00fff000
#define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000
+#define AMDGPU_MES_MSCRATCH_SIZE 0x40000
enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
@@ -55,7 +56,7 @@ enum amdgpu_mes_priority_level {
struct amdgpu_mes_funcs;
-enum admgpu_mes_pipe {
+enum amdgpu_mes_pipe {
AMDGPU_MES_SCHED_PIPE = 0,
AMDGPU_MES_KIQ_PIPE,
AMDGPU_MAX_MES_PIPES = 2,
@@ -75,6 +76,7 @@ struct amdgpu_mes {
uint32_t sched_version;
uint32_t kiq_version;
+ uint32_t fw_version[AMDGPU_MAX_MES_PIPES];
bool enable_legacy_queue_map;
uint32_t total_max_queue;
@@ -109,8 +111,8 @@ struct amdgpu_mes {
uint32_t vmid_mask_gfxhub;
uint32_t vmid_mask_mmhub;
- uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
+ uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
@@ -119,9 +121,6 @@ struct amdgpu_mes {
uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES];
uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES];
uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES];
- uint32_t read_val_offs;
- uint64_t read_val_gpu_addr;
- uint32_t *read_val_ptr;
uint32_t saved_flags;
@@ -144,23 +143,10 @@ struct amdgpu_mes {
const struct amdgpu_mes_funcs *funcs;
/* mes resource_1 bo*/
- struct amdgpu_bo *resource_1;
- uint64_t resource_1_gpu_addr;
- void *resource_1_addr;
-
-};
+ struct amdgpu_bo *resource_1[AMDGPU_MAX_MES_PIPES];
+ uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES];
+ void *resource_1_addr[AMDGPU_MAX_MES_PIPES];
-struct amdgpu_mes_process {
- int pasid;
- struct amdgpu_vm *vm;
- uint64_t pd_gpu_addr;
- struct amdgpu_bo *proc_ctx_bo;
- uint64_t proc_ctx_gpu_addr;
- void *proc_ctx_cpu_ptr;
- uint64_t process_quantum;
- struct list_head gang_list;
- uint32_t doorbell_index;
- struct mutex doorbell_lock;
};
struct amdgpu_mes_gang {
@@ -249,18 +235,6 @@ struct mes_remove_queue_input {
uint64_t gang_context_addr;
};
-struct mes_reset_queue_input {
- uint32_t doorbell_offset;
- uint64_t gang_context_addr;
- bool use_mmio;
- uint32_t queue_type;
- uint32_t me_id;
- uint32_t pipe_id;
- uint32_t queue_id;
- uint32_t xcc_id;
- uint32_t vmid;
-};
-
struct mes_map_legacy_queue_input {
uint32_t queue_type;
uint32_t doorbell_offset;
@@ -292,7 +266,7 @@ struct mes_resume_gang_input {
uint64_t gang_context_addr;
};
-struct mes_reset_legacy_queue_input {
+struct mes_reset_queue_input {
uint32_t queue_type;
uint32_t doorbell_offset;
bool use_mmio;
@@ -302,6 +276,8 @@ struct mes_reset_legacy_queue_input {
uint64_t mqd_addr;
uint64_t wptr_addr;
uint32_t vmid;
+ bool legacy_gfx;
+ bool is_kq;
};
enum mes_misc_opcode {
@@ -310,6 +286,7 @@ enum mes_misc_opcode {
MES_MISC_OP_WRM_REG_WAIT,
MES_MISC_OP_WRM_REG_WR_WAIT,
MES_MISC_OP_SET_SHADER_DEBUGGER,
+ MES_MISC_OP_CHANGE_CONFIG,
};
struct mes_misc_op_input {
@@ -348,6 +325,21 @@ struct mes_misc_op_input {
uint32_t tcp_watch_cntl[4];
uint32_t trap_en;
} set_shader_debugger;
+
+ struct {
+ union {
+ struct {
+ uint32_t limit_single_process : 1;
+ uint32_t enable_hws_logging_buffer : 1;
+ uint32_t reserved : 30;
+ };
+ uint32_t all;
+ } option;
+ struct {
+ uint32_t tdr_level;
+ uint32_t tdr_delay;
+ } tdr_config;
+ } change_config;
};
};
@@ -373,9 +365,6 @@ struct amdgpu_mes_funcs {
int (*misc_op)(struct amdgpu_mes *mes,
struct mes_misc_op_input *input);
- int (*reset_legacy_queue)(struct amdgpu_mes *mes,
- struct mes_reset_legacy_queue_input *input);
-
int (*reset_hw_queue)(struct amdgpu_mes *mes,
struct mes_reset_queue_input *input);
};
@@ -383,32 +372,13 @@ struct amdgpu_mes_funcs {
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
-int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
-
int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
int amdgpu_mes_init(struct amdgpu_device *adev);
void amdgpu_mes_fini(struct amdgpu_device *adev);
-int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
- struct amdgpu_vm *vm);
-void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
-
-int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
- struct amdgpu_mes_gang_properties *gprops,
- int *gang_id);
-int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
-
int amdgpu_mes_suspend(struct amdgpu_device *adev);
int amdgpu_mes_resume(struct amdgpu_device *adev);
-int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
- struct amdgpu_mes_queue_properties *qprops,
- int *queue_id);
-int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
-int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id);
-int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type,
- int me_id, int pipe_id, int queue_id, int vmid);
-
int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
@@ -423,8 +393,6 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
int amdgpu_mes_wreg(struct amdgpu_device *adev,
uint32_t reg, uint32_t val);
-int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
- uint32_t val, uint32_t mask);
int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t reg0, uint32_t reg1,
uint32_t ref, uint32_t mask);
@@ -436,27 +404,10 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
bool trap_en);
int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
uint64_t process_context_addr);
-int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
- int queue_type, int idx,
- struct amdgpu_mes_ctx_data *ctx_data,
- struct amdgpu_ring **out);
-void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
- struct amdgpu_ring *ring);
uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
enum amdgpu_mes_priority_level prio);
-int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
- struct amdgpu_mes_ctx_data *ctx_data);
-void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
-int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_mes_ctx_data *ctx_data);
-int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
- struct amdgpu_mes_ctx_data *ctx_data);
-
-int amdgpu_mes_self_test(struct amdgpu_device *adev);
-
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
/*
@@ -518,4 +469,7 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
}
bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
+
+int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev);
+
#endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 5e3faefc5510..6da4f946cac0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -609,6 +609,7 @@ struct amdgpu_i2c_adapter {
struct i2c_adapter base;
struct ddc_service *ddc_service;
+ bool oem;
};
#define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index f61d117b0caf..79c2f807b9fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -101,6 +101,7 @@ struct amdgpu_nbio_funcs {
int (*get_compute_partition_mode)(struct amdgpu_device *adev);
u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
u32 *supp_modes);
+ bool (*is_nps_switch_requested)(struct amdgpu_device *adev);
u64 (*get_pcie_replay_count)(struct amdgpu_device *adev);
void (*set_reg_remap)(struct amdgpu_device *adev);
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 44819cdba7fb..73403744331a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -40,6 +40,8 @@
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_vram_mgr.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_dma_buf.h"
/**
* DOC: amdgpu_object
@@ -161,7 +163,8 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
* When GTT is just an alternative to VRAM make sure that we
* only use it as fallback and still try to fill up VRAM first.
*/
- if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
+ if (abo->tbo.resource && !(adev->flags & AMD_IS_APU) &&
+ domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
places[c].flags |= TTM_PL_FLAG_FALLBACK;
c++;
}
@@ -322,6 +325,9 @@ error_free:
*
* Allocates and pins a BO for kernel internal use.
*
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to create and access the kernel BO.
+ *
* Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
*
* Returns:
@@ -345,6 +351,76 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
return 0;
}
+EXPORT_SYMBOL(amdgpu_bo_create_kernel);
+
+/**
+ * amdgpu_bo_create_isp_user - create user BO for isp
+ *
+ * @adev: amdgpu device object
+ * @dma_buf: DMABUF handle for isp buffer
+ * @domain: where to place it
+ * @bo: used to initialize BOs in structures
+ * @gpu_addr: GPU addr of the pinned BO
+ *
+ * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does
+ * GART alloc to generate gpu_addr for BO to make it accessible through the
+ * GART aperture for ISP HW.
+ *
+ * This function is exported to allow the V4L2 isp device external to drm device
+ * to create and access the isp user BO.
+ *
+ * Returns:
+ * 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+ struct dma_buf *dma_buf, u32 domain, struct amdgpu_bo **bo,
+ u64 *gpu_addr)
+
+{
+ struct drm_gem_object *gem_obj;
+ int r;
+
+ gem_obj = amdgpu_gem_prime_import(&adev->ddev, dma_buf);
+ *bo = gem_to_amdgpu_bo(gem_obj);
+ if (!(*bo)) {
+ dev_err(adev->dev, "failed to get valid isp user bo\n");
+ return -EINVAL;
+ }
+
+ r = amdgpu_bo_reserve(*bo, false);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to reserve isp user bo\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_pin(*bo, domain);
+ if (r) {
+ dev_err(adev->dev, "(%d) isp user bo pin failed\n", r);
+ goto error_unreserve;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&(*bo)->tbo);
+ if (r) {
+ dev_err(adev->dev, "%p bind failed\n", *bo);
+ goto error_unpin;
+ }
+
+ if (!WARN_ON(!gpu_addr))
+ *gpu_addr = amdgpu_bo_gpu_offset(*bo);
+
+ amdgpu_bo_unreserve(*bo);
+
+ return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(*bo);
+error_unreserve:
+ amdgpu_bo_unreserve(*bo);
+ amdgpu_bo_unref(bo);
+
+ return r;
+}
+EXPORT_SYMBOL(amdgpu_bo_create_isp_user);
/**
* amdgpu_bo_create_kernel_at - create BO for kernel use at specific location
@@ -421,6 +497,9 @@ error:
* @cpu_addr: pointer to where the BO's CPU memory space address was stored
*
* unmaps and unpin a BO for kernel internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the kernel BO.
*/
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr)
@@ -445,6 +524,30 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
if (cpu_addr)
*cpu_addr = NULL;
}
+EXPORT_SYMBOL(amdgpu_bo_free_kernel);
+
+/**
+ * amdgpu_bo_free_isp_user - free BO for isp use
+ *
+ * @bo: amdgpu isp user BO to free
+ *
+ * unpin and unref BO for isp internal use.
+ *
+ * This function is exported to allow the V4L2 isp device
+ * external to drm device to free the isp user BO.
+ */
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo)
+{
+ if (bo == NULL)
+ return;
+
+ if (amdgpu_bo_reserve(bo, true) == 0) {
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+ }
+ amdgpu_bo_unref(&bo);
+}
+EXPORT_SYMBOL(amdgpu_bo_free_isp_user);
/* Validate bo size is bit bigger than the request domain */
static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
@@ -941,7 +1044,8 @@ static const char * const amdgpu_vram_names[] = {
"GDDR6",
"DDR5",
"LPDDR4",
- "LPDDR5"
+ "LPDDR5",
+ "HBM3E"
};
/**
@@ -1148,7 +1252,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
struct ttm_resource *new_mem)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_resource *old_mem = bo->resource;
struct amdgpu_bo *abo;
@@ -1156,7 +1259,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
return;
abo = ttm_to_amdgpu_bo(bo);
- amdgpu_vm_bo_invalidate(adev, abo, evict);
+ amdgpu_vm_bo_move(abo, new_mem, evict);
amdgpu_bo_kunmap(abo);
@@ -1169,58 +1272,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
old_mem ? old_mem->mem_type : -1);
}
-void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
- struct amdgpu_mem_stats *stats)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct ttm_resource *res = bo->tbo.resource;
- uint64_t size = amdgpu_bo_size(bo);
- struct drm_gem_object *obj;
- bool shared;
-
- /* Abort if the BO doesn't currently have a backing store */
- if (!res)
- return;
-
- obj = &bo->tbo.base;
- shared = drm_gem_object_is_shared_for_memory_stats(obj);
-
- switch (res->mem_type) {
- case TTM_PL_VRAM:
- stats->vram += size;
- if (amdgpu_res_cpu_visible(adev, res))
- stats->visible_vram += size;
- if (shared)
- stats->vram_shared += size;
- break;
- case TTM_PL_TT:
- stats->gtt += size;
- if (shared)
- stats->gtt_shared += size;
- break;
- case TTM_PL_SYSTEM:
- default:
- stats->cpu += size;
- if (shared)
- stats->cpu_shared += size;
- break;
- }
-
- if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) {
- stats->requested_vram += size;
- if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
- stats->requested_visible_vram += size;
-
- if (res->mem_type != TTM_PL_VRAM) {
- stats->evicted_vram += size;
- if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
- stats->evicted_visible_vram += size;
- }
- } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) {
- stats->requested_gtt += size;
- }
-}
-
/**
* amdgpu_bo_release_notify - notification about a BO being released
* @bo: pointer to a buffer object
@@ -1245,28 +1296,36 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (abo->kfd_bo)
amdgpu_amdkfd_release_notify(abo);
- /* We only remove the fence if the resv has individualized. */
- WARN_ON_ONCE(bo->type == ttm_bo_type_kernel
- && bo->base.resv != &bo->base._resv);
- if (bo->base.resv == &bo->base._resv)
- amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
+ /*
+ * We lock the private dma_resv object here and since the BO is about to
+ * be released nobody else should have a pointer to it.
+ * So when this locking here fails something is wrong with the reference
+ * counting.
+ */
+ if (WARN_ON_ONCE(!dma_resv_trylock(&bo->base._resv)))
+ return;
+
+ amdgpu_amdkfd_remove_all_eviction_fences(abo);
if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM ||
!(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) ||
adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev)))
- return;
+ goto out;
- if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
- return;
+ r = dma_resv_reserve_fences(&bo->base._resv, 1);
+ if (r)
+ goto out;
- r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
- if (!WARN_ON(r)) {
- amdgpu_vram_mgr_set_cleared(bo->resource);
- amdgpu_bo_fence(abo, fence, false);
- dma_fence_put(fence);
- }
+ r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true);
+ if (WARN_ON(r))
+ goto out;
+
+ amdgpu_vram_mgr_set_cleared(bo->resource);
+ dma_resv_add_fence(&bo->base._resv, fence, DMA_RESV_USAGE_KERNEL);
+ dma_fence_put(fence);
- dma_resv_unlock(bo->base.resv);
+out:
+ dma_resv_unlock(&bo->base._resv);
}
/**
@@ -1436,6 +1495,45 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
}
/**
+ * amdgpu_bo_mem_stats_placement - bo placement for memory accounting
+ * @bo: the buffer object we should look at
+ *
+ * BO can have multiple preferred placements, to avoid double counting we want
+ * to file it under a single placement for memory stats.
+ * Luckily, if we take the highest set bit in preferred_domains the result is
+ * quite sensible.
+ *
+ * Returns:
+ * Which of the placements should the BO be accounted under.
+ */
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo)
+{
+ uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK;
+
+ if (!domain)
+ return TTM_PL_SYSTEM;
+
+ switch (rounddown_pow_of_two(domain)) {
+ case AMDGPU_GEM_DOMAIN_CPU:
+ return TTM_PL_SYSTEM;
+ case AMDGPU_GEM_DOMAIN_GTT:
+ return TTM_PL_TT;
+ case AMDGPU_GEM_DOMAIN_VRAM:
+ return TTM_PL_VRAM;
+ case AMDGPU_GEM_DOMAIN_GDS:
+ return AMDGPU_PL_GDS;
+ case AMDGPU_GEM_DOMAIN_GWS:
+ return AMDGPU_PL_GWS;
+ case AMDGPU_GEM_DOMAIN_OA:
+ return AMDGPU_PL_OA;
+ case AMDGPU_GEM_DOMAIN_DOORBELL:
+ return AMDGPU_PL_DOORBELL;
+ default:
+ return TTM_PL_SYSTEM;
+ }
+}
+
+/**
* amdgpu_bo_get_preferred_domain - get preferred domain
* @adev: amdgpu device object
* @domain: allowed :ref:`memory domains <amdgpu_memory_domains>`
@@ -1547,7 +1645,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
-
+ /* Add the gem obj resv fence dump*/
+ if (dma_resv_trylock(bo->tbo.base.resv)) {
+ dma_resv_describe(bo->tbo.base.resv, m);
+ dma_resv_unlock(bo->tbo.base.resv);
+ }
seq_puts(m, "\n");
return size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 717e47b46167..375448627f7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -139,33 +139,6 @@ struct amdgpu_bo_vm {
struct amdgpu_vm_bo_base entries[];
};
-struct amdgpu_mem_stats {
- /* current VRAM usage, includes visible VRAM */
- uint64_t vram;
- /* current shared VRAM usage, includes visible VRAM */
- uint64_t vram_shared;
- /* current visible VRAM usage */
- uint64_t visible_vram;
- /* current GTT usage */
- uint64_t gtt;
- /* current shared GTT usage */
- uint64_t gtt_shared;
- /* current system memory usage */
- uint64_t cpu;
- /* current shared system memory usage */
- uint64_t cpu_shared;
- /* sum of evicted buffers, includes visible VRAM */
- uint64_t evicted_vram;
- /* sum of evicted buffers due to CPU access */
- uint64_t evicted_visible_vram;
- /* how much userspace asked for, includes vis.VRAM */
- uint64_t requested_vram;
- /* how much userspace asked for */
- uint64_t requested_visible_vram;
- /* how much userspace asked for */
- uint64_t requested_gtt;
-};
-
static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
{
return container_of(tbo, struct amdgpu_bo, tbo);
@@ -287,6 +260,10 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
unsigned long size, int align,
u32 domain, struct amdgpu_bo **bo_ptr,
u64 *gpu_addr, void **cpu_addr);
+int amdgpu_bo_create_isp_user(struct amdgpu_device *adev,
+ struct dma_buf *dbuf, u32 domain,
+ struct amdgpu_bo **bo,
+ u64 *gpu_addr);
int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
uint64_t offset, uint64_t size,
struct amdgpu_bo **bo_ptr, void **cpu_addr);
@@ -298,6 +275,7 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev,
struct amdgpu_bo_vm **ubo_ptr);
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr);
+void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
@@ -327,8 +305,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo);
-void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
- struct amdgpu_mem_stats *stats);
+uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo);
uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
uint32_t domain);
@@ -363,8 +340,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
struct drm_suballoc **sa_bo,
unsigned int size);
-void amdgpu_sa_bo_free(struct amdgpu_device *adev,
- struct drm_suballoc **sa_bo,
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo,
struct dma_fence *fence);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
index e8adfd0a570a..34b5e22b44e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
@@ -137,7 +137,8 @@ void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
if (ret)
return;
- device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
+ if (adev->dev->kobj.sd)
+ device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 0b28b2cf1517..c14f63cefe67 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -44,7 +44,7 @@
#include "amdgpu_securedisplay.h"
#include "amdgpu_atomfirmware.h"
-#define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*3)
+#define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*16)
static int psp_load_smu_fw(struct psp_context *psp);
static int psp_rap_terminate(struct psp_context *psp);
@@ -153,15 +153,18 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
ret = psp_init_cap_microcode(psp, ucode_prefix);
break;
+ case IP_VERSION(13, 0, 12):
+ ret = psp_init_ta_microcode(psp, ucode_prefix);
+ break;
default:
return -EINVAL;
}
return ret;
}
-static int psp_early_init(void *handle)
+static int psp_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
psp->autoload_supported = true;
@@ -193,6 +196,7 @@ static int psp_early_init(void *handle)
case IP_VERSION(11, 0, 9):
case IP_VERSION(11, 0, 11):
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
case IP_VERSION(11, 0, 12):
case IP_VERSION(11, 0, 13):
psp_v11_0_set_psp_funcs(psp);
@@ -212,6 +216,11 @@ static int psp_early_init(void *handle)
psp_v13_0_set_psp_funcs(psp);
psp->autoload_supported = false;
break;
+ case IP_VERSION(13, 0, 12):
+ psp_v13_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev);
+ break;
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 3):
case IP_VERSION(13, 0, 5):
@@ -245,6 +254,10 @@ static int psp_early_init(void *handle)
case IP_VERSION(14, 0, 3):
psp_v14_0_set_psp_funcs(psp);
break;
+ case IP_VERSION(14, 0, 5):
+ psp_v14_0_set_psp_funcs(psp);
+ psp->boot_time_tmr = false;
+ break;
default:
return -EINVAL;
}
@@ -359,6 +372,7 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
int i;
if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))
return false;
@@ -421,9 +435,9 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
return ret;
}
-static int psp_sw_init(void *handle)
+static int psp_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
int ret;
struct psp_runtime_boot_cfg_entry boot_cfg_entry;
@@ -527,11 +541,10 @@ failed1:
return ret;
}
-static int psp_sw_fini(void *handle)
+static int psp_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
- struct psp_gfx_cmd_resp *cmd = psp->cmd;
psp_memory_training_fini(psp);
@@ -541,8 +554,8 @@ static int psp_sw_fini(void *handle)
amdgpu_ucode_release(&psp->cap_fw);
amdgpu_ucode_release(&psp->toc_fw);
- kfree(cmd);
- cmd = NULL;
+ kfree(psp->cmd);
+ psp->cmd = NULL;
psp_free_shared_bufs(psp);
@@ -639,6 +652,8 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id)
return "AUTOLOAD_RLC";
case GFX_CMD_ID_BOOT_CFG:
return "BOOT_CFG";
+ case GFX_CMD_ID_CONFIG_SQ_PERFMON:
+ return "CONFIG_SQ_PERFMON";
default:
return "UNKNOWN CMD";
}
@@ -868,6 +883,7 @@ static bool psp_skip_tmr(struct psp_context *psp)
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
return true;
default:
@@ -1043,6 +1059,31 @@ static int psp_rl_load(struct amdgpu_device *adev)
return ret;
}
+int psp_memory_partition(struct psp_context *psp, int mode)
+{
+ struct psp_gfx_cmd_resp *cmd;
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_FB_NPS_MODE;
+ cmd->cmd.cmd_memory_part.mode = mode;
+
+ dev_info(psp->adev->dev,
+ "Requesting %d memory partition change through PSP", mode);
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (ret)
+ dev_err(psp->adev->dev,
+ "PSP request failed to change to NPS%d mode\n", mode);
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
int psp_spatial_partition(struct psp_context *psp, int mode)
{
struct psp_gfx_cmd_resp *cmd;
@@ -1756,34 +1797,47 @@ int psp_ras_initialize(struct psp_context *psp)
if (ret)
dev_warn(adev->dev, "PSP get boot config failed\n");
- if (!amdgpu_ras_is_supported(psp->adev, AMDGPU_RAS_BLOCK__UMC)) {
- if (!boot_cfg) {
- dev_info(adev->dev, "GECC is disabled\n");
- } else {
- /* disable GECC in next boot cycle if ras is
- * disabled by module parameter amdgpu_ras_enable
- * and/or amdgpu_ras_mask, or boot_config_get call
- * is failed
- */
- ret = psp_boot_config_set(adev, 0);
- if (ret)
- dev_warn(adev->dev, "PSP set boot config failed\n");
- else
- dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
- }
+ if (boot_cfg == 1 && !adev->ras_default_ecc_enabled &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+ dev_warn(adev->dev, "GECC is currently enabled, which may affect performance\n");
+ dev_warn(adev->dev,
+ "To disable GECC, please reboot the system and load the amdgpu driver with the parameter amdgpu_ras_enable=0\n");
} else {
- if (boot_cfg == 1) {
- dev_info(adev->dev, "GECC is enabled\n");
+ if ((adev->ras_default_ecc_enabled || amdgpu_ras_enable == 1) &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+ if (boot_cfg == 1) {
+ dev_info(adev->dev, "GECC is enabled\n");
+ } else {
+ /* enable GECC in next boot cycle if it is disabled
+ * in boot config, or force enable GECC if failed to
+ * get boot configuration
+ */
+ ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
+ if (ret)
+ dev_warn(adev->dev, "PSP set boot config failed\n");
+ else
+ dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
+ }
} else {
- /* enable GECC in next boot cycle if it is disabled
- * in boot config, or force enable GECC if failed to
- * get boot configuration
- */
- ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC);
- if (ret)
- dev_warn(adev->dev, "PSP set boot config failed\n");
- else
- dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n");
+ if (!boot_cfg) {
+ if (!adev->ras_default_ecc_enabled &&
+ amdgpu_ras_enable != 1 &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ dev_warn(adev->dev, "GECC is disabled, set amdgpu_ras_enable=1 to enable GECC in next boot cycle if needed\n");
+ else
+ dev_info(adev->dev, "GECC is disabled\n");
+ } else {
+ /* disable GECC in next boot cycle if ras is
+ * disabled by module parameter amdgpu_ras_enable
+ * and/or amdgpu_ras_mask, or boot_config_get call
+ * is failed
+ */
+ ret = psp_boot_config_set(adev, 0);
+ if (ret)
+ dev_warn(adev->dev, "PSP set boot config failed\n");
+ else
+ dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n");
+ }
}
}
}
@@ -1807,6 +1861,10 @@ int psp_ras_initialize(struct psp_context *psp)
ras_cmd->ras_in_message.init_flags.xcc_mask =
adev->gfx.xcc_mask;
ras_cmd->ras_in_message.init_flags.channel_dis_num = hweight32(adev->gmc.m_half_use) * 2;
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ ras_cmd->ras_in_message.init_flags.nps_mode =
+ adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask;
ret = psp_ta_load(psp, &psp->ras_context.context);
@@ -2156,7 +2214,8 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
if (!psp->securedisplay_context.context.bin_desc.size_bytes ||
!psp->securedisplay_context.context.bin_desc.start_addr) {
- dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n");
+ dev_info(psp->adev->dev,
+ "SECUREDISPLAY: optional securedisplay ta ucode is not available\n");
return 0;
}
@@ -2234,7 +2293,8 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
return -EINVAL;
if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA &&
- ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC)
+ ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC &&
+ ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2)
return -EINVAL;
ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context);
@@ -2264,6 +2324,19 @@ bool amdgpu_psp_get_ras_capability(struct psp_context *psp)
}
}
+bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return false;
+
+ if (psp->funcs && psp->funcs->is_reload_needed)
+ return psp->funcs->is_reload_needed(psp);
+
+ return false;
+}
+
static int psp_hw_start(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -2342,6 +2415,15 @@ static int psp_hw_start(struct psp_context *psp)
}
}
+ if ((is_psp_fw_valid(psp->spdm_drv)) &&
+ (psp->funcs->bootloader_load_spdm_drv != NULL)) {
+ ret = psp_bootloader_load_spdm_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load spdm_drv failed!\n");
+ return ret;
+ }
+ }
+
if ((is_psp_fw_valid(psp->sos)) &&
(psp->funcs->bootloader_load_sos != NULL)) {
ret = psp_bootloader_load_sos(psp);
@@ -2958,16 +3040,13 @@ failed:
return ret;
}
-static int psp_hw_init(void *handle)
+static int psp_hw_init(struct amdgpu_ip_block *ip_block)
{
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
mutex_lock(&adev->firmware.mutex);
- /*
- * This sequence is just used on hw_init only once, no need on
- * resume.
- */
+
ret = amdgpu_ucode_init_bo(adev);
if (ret)
goto failed;
@@ -2987,9 +3066,9 @@ failed:
return -EINVAL;
}
-static int psp_hw_fini(void *handle)
+static int psp_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
if (psp->ta_fw) {
@@ -3011,10 +3090,10 @@ static int psp_hw_fini(void *handle)
return 0;
}
-static int psp_suspend(void *handle)
+static int psp_suspend(struct amdgpu_ip_block *ip_block)
{
int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
if (adev->gmc.xgmi.num_physical_nodes > 1 &&
@@ -3074,10 +3153,10 @@ out:
return ret;
}
-static int psp_resume(void *handle)
+static int psp_resume(struct amdgpu_ip_block *ip_block)
{
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct psp_context *psp = &adev->psp;
dev_info(adev->dev, "PSP is resuming...\n");
@@ -3092,6 +3171,10 @@ static int psp_resume(void *handle)
mutex_lock(&adev->firmware.mutex);
+ ret = amdgpu_ucode_init_bo(adev);
+ if (ret)
+ goto failed;
+
ret = psp_hw_start(psp);
if (ret)
goto failed;
@@ -3246,7 +3329,8 @@ int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name)
const struct psp_firmware_header_v1_0 *asd_hdr;
int err = 0;
- err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, "amdgpu/%s_asd.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_asd.bin", chip_name);
if (err)
goto out;
@@ -3268,7 +3352,8 @@ int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name)
const struct psp_firmware_header_v1_0 *toc_hdr;
int err = 0;
- err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, "amdgpu/%s_toc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_toc.bin", chip_name);
if (err)
goto out;
@@ -3364,6 +3449,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp,
psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes);
psp->ipkeymgr_drv.start_addr = ucode_start_addr;
break;
+ case PSP_FW_TYPE_PSP_SPDM_DRV:
+ psp->spdm_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->spdm_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->spdm_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->spdm_drv.start_addr = ucode_start_addr;
+ break;
default:
dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type);
break;
@@ -3431,7 +3522,12 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name)
uint8_t *ucode_array_start_addr;
int err = 0;
- err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sos_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sos.bin", chip_name);
if (err)
goto out;
@@ -3523,6 +3619,36 @@ out:
return err;
}
+static bool is_ta_fw_applicable(struct psp_context *psp,
+ const struct psp_fw_bin_desc *desc)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t fw_version;
+
+ switch (desc->fw_type) {
+ case TA_FW_TYPE_PSP_XGMI:
+ case TA_FW_TYPE_PSP_XGMI_AUX:
+ /* for now, AUX TA only exists on 13.0.6 ta bin,
+ * from v20.00.0x.14
+ */
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) ==
+ IP_VERSION(13, 0, 6)) {
+ fw_version = le32_to_cpu(desc->fw_version);
+
+ if (adev->flags & AMD_IS_APU &&
+ (fw_version & 0xff) >= 0x14)
+ return desc->fw_type == TA_FW_TYPE_PSP_XGMI_AUX;
+ else
+ return desc->fw_type == TA_FW_TYPE_PSP_XGMI;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return true;
+}
+
static int parse_ta_bin_descriptor(struct psp_context *psp,
const struct psp_fw_bin_desc *desc,
const struct ta_firmware_header_v2_0 *ta_hdr)
@@ -3532,6 +3658,9 @@ static int parse_ta_bin_descriptor(struct psp_context *psp,
if (!psp || !desc || !ta_hdr)
return -EINVAL;
+ if (!is_ta_fw_applicable(psp, desc))
+ return 0;
+
ucode_start_addr = (uint8_t *)ta_hdr +
le32_to_cpu(desc->offset_bytes) +
le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
@@ -3544,6 +3673,7 @@ static int parse_ta_bin_descriptor(struct psp_context *psp,
psp->asd_context.bin_desc.start_addr = ucode_start_addr;
break;
case TA_FW_TYPE_PSP_XGMI:
+ case TA_FW_TYPE_PSP_XGMI_AUX:
psp->xgmi_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version);
psp->xgmi_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes);
psp->xgmi_context.context.bin_desc.start_addr = ucode_start_addr;
@@ -3673,7 +3803,12 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name)
struct amdgpu_device *adev = psp->adev;
int err;
- err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta.bin", chip_name);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ta_kicker.bin", chip_name);
+ else
+ err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_ta.bin", chip_name);
if (err)
return err;
@@ -3708,14 +3843,16 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name)
return -EINVAL;
}
- err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, "amdgpu/%s_cap.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, AMDGPU_UCODE_OPTIONAL,
+ "amdgpu/%s_cap.bin", chip_name);
if (err) {
if (err == -ENODEV) {
dev_warn(adev->dev, "cap microcode does not exist, skip\n");
err = 0;
- goto out;
+ } else {
+ dev_err(adev->dev, "fail to initialize cap microcode\n");
}
- dev_err(adev->dev, "fail to initialize cap microcode\n");
+ goto out;
}
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP];
@@ -3736,13 +3873,49 @@ out:
return err;
}
-static int psp_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
+int psp_config_sq_perfmon(struct psp_context *psp,
+ uint32_t xcp_id, bool core_override_enable,
+ bool reg_override_enable, bool perfmon_override_enable)
+{
+ int ret;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ if (xcp_id > MAX_XCP) {
+ dev_err(psp->adev->dev, "invalid xcp_id %d\n", xcp_id);
+ return -EINVAL;
+ }
+
+ if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) {
+ dev_err(psp->adev->dev, "Unsupported MP0 version 0x%x for CONFIG_SQ_PERFMON command\n",
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0));
+ return -EINVAL;
+ }
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+ cmd->cmd_id = GFX_CMD_ID_CONFIG_SQ_PERFMON;
+ cmd->cmd.config_sq_perfmon.gfx_xcp_mask = BIT_MASK(xcp_id);
+ cmd->cmd.config_sq_perfmon.core_override = core_override_enable;
+ cmd->cmd.config_sq_perfmon.reg_override = reg_override_enable;
+ cmd->cmd.config_sq_perfmon.perfmon_override = perfmon_override_enable;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+ if (ret)
+ dev_warn(psp->adev->dev, "PSP failed to config sq: xcp%d core%d reg%d perfmon%d\n",
+ xcp_id, core_override_enable, reg_override_enable, perfmon_override_enable);
+
+ release_psp_cmd_buf(psp);
+ return ret;
+}
+
+static int psp_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
{
return 0;
}
-static int psp_set_powergating_state(void *handle,
+static int psp_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3754,10 +3927,12 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct amdgpu_ip_block *ip_block;
uint32_t fw_ver;
int ret;
- if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+ if (!ip_block || !ip_block->status.late_initialized) {
dev_info(adev->dev, "PSP block is not ready yet\n.");
return -EBUSY;
}
@@ -3786,8 +3961,10 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
struct amdgpu_bo *fw_buf_bo = NULL;
uint64_t fw_pri_mc_addr;
void *fw_pri_cpu_addr;
+ struct amdgpu_ip_block *ip_block;
- if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
+ if (!ip_block || !ip_block->status.late_initialized) {
dev_err(adev->dev, "PSP block is not ready yet.");
return -EBUSY;
}
@@ -3795,7 +3972,8 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
if (!drm_dev_enter(ddev, &idx))
return -ENODEV;
- ret = amdgpu_ucode_request(adev, &usbc_pd_fw, "amdgpu/%s", buf);
+ ret = amdgpu_ucode_request(adev, &usbc_pd_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s", buf);
if (ret)
goto fail;
@@ -3856,7 +4034,7 @@ int is_psp_fw_valid(struct psp_bin_desc bin)
}
static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buffer, loff_t pos, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
@@ -3892,7 +4070,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj,
}
static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buffer,
+ const struct bin_attribute *bin_attr, char *buffer,
loff_t pos, size_t count)
{
struct device *dev = kobj_to_dev(kobj);
@@ -3944,11 +4122,11 @@ rel_buf:
* Writing to this file will stage an IFWI for update. Reading from this file
* will trigger the update process.
*/
-static struct bin_attribute psp_vbflash_bin_attr = {
+static const struct bin_attribute psp_vbflash_bin_attr = {
.attr = {.name = "psp_vbflash", .mode = 0660},
.size = 0,
- .write = amdgpu_psp_vbflash_write,
- .read = amdgpu_psp_vbflash_read,
+ .write_new = amdgpu_psp_vbflash_write,
+ .read_new = amdgpu_psp_vbflash_read,
};
/**
@@ -3975,7 +4153,7 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev,
}
static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL);
-static struct bin_attribute *bin_flash_attrs[] = {
+static const struct bin_attribute *const bin_flash_attrs[] = {
&psp_vbflash_bin_attr,
NULL
};
@@ -3999,7 +4177,7 @@ static umode_t amdgpu_flash_attr_is_visible(struct kobject *kobj, struct attribu
}
static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj,
- struct bin_attribute *attr,
+ const struct bin_attribute *attr,
int idx)
{
struct device *dev = kobj_to_dev(kobj);
@@ -4011,25 +4189,124 @@ static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj,
const struct attribute_group amdgpu_flash_attr_group = {
.attrs = flash_attrs,
- .bin_attrs = bin_flash_attrs,
+ .bin_attrs_new = bin_flash_attrs,
.is_bin_visible = amdgpu_bin_flash_attr_is_visible,
.is_visible = amdgpu_flash_attr_is_visible,
};
+#if defined(CONFIG_DEBUG_FS)
+static int psp_read_spirom_debugfs_open(struct inode *inode, struct file *filp)
+{
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet;
+ int ret;
+
+ /* serialize the open() file calling */
+ if (!mutex_trylock(&adev->psp.mutex))
+ return -EBUSY;
+
+ /*
+ * make sure only one userpace process is alive for dumping so that
+ * only one memory buffer of AMD_VBIOS_FILE_MAX_SIZE * 2 is consumed.
+ * let's say the case where one process try opening the file while
+ * another one has proceeded to read or release. In this way, eliminate
+ * the use of mutex for read() or release() callback as well.
+ */
+ if (adev->psp.spirom_dump_trip) {
+ mutex_unlock(&adev->psp.mutex);
+ return -EBUSY;
+ }
+
+ bo_triplet = kzalloc(sizeof(struct spirom_bo), GFP_KERNEL);
+ if (!bo_triplet) {
+ mutex_unlock(&adev->psp.mutex);
+ return -ENOMEM;
+ }
+
+ ret = amdgpu_bo_create_kernel(adev, AMD_VBIOS_FILE_MAX_SIZE_B * 2,
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &bo_triplet->bo,
+ &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+ if (ret)
+ goto rel_trip;
+
+ ret = psp_dump_spirom(&adev->psp, bo_triplet->mc_addr);
+ if (ret)
+ goto rel_bo;
+
+ adev->psp.spirom_dump_trip = bo_triplet;
+ mutex_unlock(&adev->psp.mutex);
+ return 0;
+rel_bo:
+ amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+rel_trip:
+ kfree(bo_triplet);
+ mutex_unlock(&adev->psp.mutex);
+ dev_err(adev->dev, "Trying IFWI dump fails, err = %d\n", ret);
+ return ret;
+}
+
+static ssize_t psp_read_spirom_debugfs_read(struct file *filp, char __user *buf, size_t size,
+ loff_t *pos)
+{
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip;
+
+ if (!bo_triplet)
+ return -EINVAL;
+
+ return simple_read_from_buffer(buf,
+ size,
+ pos, bo_triplet->cpu_addr,
+ AMD_VBIOS_FILE_MAX_SIZE_B * 2);
+}
+
+static int psp_read_spirom_debugfs_release(struct inode *inode, struct file *filp)
+{
+ struct amdgpu_device *adev = filp->f_inode->i_private;
+ struct spirom_bo *bo_triplet = adev->psp.spirom_dump_trip;
+
+ if (bo_triplet) {
+ amdgpu_bo_free_kernel(&bo_triplet->bo, &bo_triplet->mc_addr,
+ &bo_triplet->cpu_addr);
+ kfree(bo_triplet);
+ }
+
+ adev->psp.spirom_dump_trip = NULL;
+ return 0;
+}
+
+static const struct file_operations psp_dump_spirom_debugfs_ops = {
+ .owner = THIS_MODULE,
+ .open = psp_read_spirom_debugfs_open,
+ .read = psp_read_spirom_debugfs_read,
+ .release = psp_read_spirom_debugfs_release,
+ .llseek = default_llseek,
+};
+#endif
+
+void amdgpu_psp_debugfs_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+ debugfs_create_file_size("psp_spirom_dump", 0444, minor->debugfs_root,
+ adev, &psp_dump_spirom_debugfs_ops, AMD_VBIOS_FILE_MAX_SIZE_B * 2);
+#endif
+}
+
const struct amd_ip_funcs psp_ip_funcs = {
.name = "psp",
.early_init = psp_early_init,
- .late_init = NULL,
.sw_init = psp_sw_init,
.sw_fini = psp_sw_fini,
.hw_init = psp_hw_init,
.hw_fini = psp_hw_fini,
.suspend = psp_suspend,
.resume = psp_resume,
- .is_idle = NULL,
- .check_soft_reset = NULL,
- .wait_for_idle = NULL,
- .soft_reset = NULL,
.set_clockgating_state = psp_set_clockgating_state,
.set_powergating_state = psp_set_powergating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index e8abbbcb4326..428adc7f741d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -39,6 +39,18 @@
#define PSP_TMR_ALIGNMENT 0x100000
#define PSP_FW_NAME_LEN 0x24
+/* VBIOS gfl defines */
+#define MBOX_READY_MASK 0x80000000
+#define MBOX_STATUS_MASK 0x0000FFFF
+#define MBOX_COMMAND_MASK 0x00FF0000
+#define MBOX_READY_FLAG 0x80000000
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
+#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
+#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
+#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO 0xf
+#define C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI 0x10
+#define C2PMSG_CMD_SPI_GET_FLASH_IMAGE 0x11
+
extern const struct attribute_group amdgpu_flash_attr_group;
enum psp_shared_mem_size {
@@ -80,6 +92,7 @@ enum psp_bootloader_cmd {
PSP_BL__DRAM_LONG_TRAIN = 0x100000,
PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
+ PSP_BL__LOAD_SPDMDRV = 0x20000000,
};
enum psp_ring_type {
@@ -106,6 +119,7 @@ enum psp_reg_prog_id {
PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
+ PSP_REG_MMHUB_L1_TLB_CNTL = 25,
PSP_REG_LAST
};
@@ -120,6 +134,7 @@ struct psp_funcs {
int (*bootloader_load_dbg_drv)(struct psp_context *psp);
int (*bootloader_load_ras_drv)(struct psp_context *psp);
int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp);
+ int (*bootloader_load_spdm_drv)(struct psp_context *psp);
int (*bootloader_load_sos)(struct psp_context *psp);
int (*ring_create)(struct psp_context *psp,
enum psp_ring_type ring_type);
@@ -135,10 +150,14 @@ struct psp_funcs {
int (*load_usbc_pd_fw)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver);
int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
+ int (*dump_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
int (*vbflash_stat)(struct psp_context *psp);
int (*fatal_error_recovery_quirk)(struct psp_context *psp);
bool (*get_ras_capability)(struct psp_context *psp);
bool (*is_aux_sos_load_required)(struct psp_context *psp);
+ bool (*is_reload_needed)(struct psp_context *psp);
+ int (*reg_program_no_ring)(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id);
};
struct ta_funcs {
@@ -316,6 +335,14 @@ struct psp_runtime_scpm_entry {
enum psp_runtime_scpm_authentication scpm_status;
};
+#if defined(CONFIG_DEBUG_FS)
+struct spirom_bo {
+ struct amdgpu_bo *bo;
+ uint64_t mc_addr;
+ void *cpu_addr;
+};
+#endif
+
struct psp_context {
struct amdgpu_device *adev;
struct psp_ring km_ring;
@@ -342,6 +369,7 @@ struct psp_context {
struct psp_bin_desc dbg_drv;
struct psp_bin_desc ras_drv;
struct psp_bin_desc ipkeymgr_drv;
+ struct psp_bin_desc spdm_drv;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
@@ -402,6 +430,9 @@ struct psp_context {
char *vbflash_tmp_buf;
size_t vbflash_image_size;
bool vbflash_done;
+#if defined(CONFIG_DEBUG_FS)
+ struct spirom_bo *spirom_dump_trip;
+#endif
};
struct amdgpu_psp_funcs {
@@ -433,6 +464,9 @@ struct amdgpu_psp_funcs {
#define psp_bootloader_load_ipkeymgr_drv(psp) \
((psp)->funcs->bootloader_load_ipkeymgr_drv ? \
(psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0)
+#define psp_bootloader_load_spdm_drv(psp) \
+ ((psp)->funcs->bootloader_load_spdm_drv ? \
+ (psp)->funcs->bootloader_load_spdm_drv((psp)) : 0)
#define psp_bootloader_load_sos(psp) \
((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
#define psp_smu_reload_quirk(psp) \
@@ -457,6 +491,10 @@ struct amdgpu_psp_funcs {
((psp)->funcs->update_spirom ? \
(psp)->funcs->update_spirom((psp), fw_pri_mc_addr) : -EINVAL)
+#define psp_dump_spirom(psp, fw_pri_mc_addr) \
+ ((psp)->funcs->dump_spirom ? \
+ (psp)->funcs->dump_spirom((psp), fw_pri_mc_addr) : -EINVAL)
+
#define psp_vbflash_status(psp) \
((psp)->funcs->vbflash_stat ? \
(psp)->funcs->vbflash_stat((psp)) : -EINVAL)
@@ -468,6 +506,10 @@ struct amdgpu_psp_funcs {
#define psp_is_aux_sos_load_required(psp) \
((psp)->funcs->is_aux_sos_load_required ? (psp)->funcs->is_aux_sos_load_required((psp)) : 0)
+#define psp_reg_program_no_ring(psp, val, id) \
+ ((psp)->funcs->reg_program_no_ring ? \
+ (psp)->funcs->reg_program_no_ring((psp), val, id) : -EINVAL)
+
extern const struct amd_ip_funcs psp_ip_funcs;
extern const struct amdgpu_ip_block_version psp_v3_1_ip_block;
@@ -552,9 +594,19 @@ int psp_load_fw_list(struct psp_context *psp,
void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size);
int psp_spatial_partition(struct psp_context *psp, int mode);
+int psp_memory_partition(struct psp_context *psp, int mode);
int is_psp_fw_valid(struct psp_bin_desc bin);
int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
bool amdgpu_psp_get_ras_capability(struct psp_context *psp);
+
+int psp_config_sq_perfmon(struct psp_context *psp, uint32_t xcp_id,
+ bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable);
+bool amdgpu_psp_tos_reload_needed(struct amdgpu_device *adev);
+int amdgpu_psp_reg_program_no_ring(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id);
+void amdgpu_psp_debugfs_init(struct amdgpu_device *adev);
+
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1a1395c5fff1..de0944947eaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -36,6 +36,7 @@
#include "amdgpu_xgmi.h"
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include "nbio_v4_3.h"
+#include "nbif_v6_3_1.h"
#include "nbio_v7_9.h"
#include "atom.h"
#include "amdgpu_reset.h"
@@ -76,6 +77,7 @@ const char *ras_block_string[] = {
"jpeg",
"ih",
"mpio",
+ "mmsch",
};
const char *ras_mca_block_string[] = {
@@ -192,7 +194,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
- err_data.err_addr_cnt);
+ err_data.err_addr_cnt, false);
amdgpu_ras_save_bad_pages(adev, NULL);
}
@@ -1214,6 +1216,42 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
}
}
+static void amdgpu_ras_virt_error_generate_report(struct amdgpu_device *adev,
+ struct ras_query_if *query_if,
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
+{
+ unsigned long new_ue, new_ce, new_de;
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &query_if->head);
+ const char *blk_name = get_ras_block_str(&query_if->head);
+ u64 event_id = qctx->evid.event_id;
+
+ new_ce = err_data->ce_count - obj->err_data.ce_count;
+ new_ue = err_data->ue_count - obj->err_data.ue_count;
+ new_de = err_data->de_count - obj->err_data.de_count;
+
+ if (new_ce) {
+ RAS_EVENT_LOG(adev, event_id, "%lu correctable hardware errors "
+ "detected in %s block\n",
+ new_ce,
+ blk_name);
+ }
+
+ if (new_ue) {
+ RAS_EVENT_LOG(adev, event_id, "%lu uncorrectable hardware errors "
+ "detected in %s block\n",
+ new_ue,
+ blk_name);
+ }
+
+ if (new_de) {
+ RAS_EVENT_LOG(adev, event_id, "%lu deferred hardware errors "
+ "detected in %s block\n",
+ new_de,
+ blk_name);
+ }
+}
+
static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data)
{
struct ras_err_node *err_node;
@@ -1237,6 +1275,15 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s
}
}
+static void amdgpu_ras_mgr_virt_error_data_statistics_update(struct ras_manager *obj,
+ struct ras_err_data *err_data)
+{
+ /* Host reports absolute counts */
+ obj->err_data.ue_count = err_data->ue_count;
+ obj->err_data.ce_count = err_data->ce_count;
+ obj->err_data.de_count = err_data->de_count;
+}
+
static struct ras_manager *get_ras_manager(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
{
struct ras_common_if head;
@@ -1253,7 +1300,7 @@ int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
struct ras_manager *obj;
/* in resume phase, no need to create aca fs node */
- if (adev->in_suspend || amdgpu_in_reset(adev))
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev))
return 0;
obj = get_ras_manager(adev, blk);
@@ -1323,7 +1370,9 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
return -EINVAL;
- if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
+ if (error_query_mode == AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) {
+ return amdgpu_virt_req_ras_err_count(adev, blk, err_data);
+ } else if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) {
if (info->head.block == AMDGPU_RAS_BLOCK__UMC) {
amdgpu_ras_get_ecc_info(adev, err_data);
} else {
@@ -1405,14 +1454,22 @@ static int amdgpu_ras_query_error_status_with_event(struct amdgpu_device *adev,
if (ret)
goto out_fini_err_data;
- amdgpu_rasmgr_error_data_statistic_update(obj, &err_data);
+ if (error_query_mode != AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY) {
+ amdgpu_rasmgr_error_data_statistic_update(obj, &err_data);
+ amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx);
+ } else {
+ /* Host provides absolute error counts. First generate the report
+ * using the previous VF internal count against new host count.
+ * Then Update VF internal count.
+ */
+ amdgpu_ras_virt_error_generate_report(adev, info, &err_data, &qctx);
+ amdgpu_ras_mgr_virt_error_data_statistics_update(obj, &err_data);
+ }
info->ue_count = obj->err_data.ue_count;
info->ce_count = obj->err_data.ce_count;
info->de_count = obj->err_data.de_count;
- amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx);
-
out_fini_err_data:
amdgpu_ras_error_data_fini(&err_data);
@@ -1441,6 +1498,9 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
!amdgpu_ras_get_aca_debug_mode(adev))
return -EOPNOTSUPP;
+ if (amdgpu_sriov_vf(adev))
+ return -EOPNOTSUPP;
+
/* skip ras error reset in gpu reset */
if ((amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) &&
((smu_funcs && smu_funcs->set_debug_mode) ||
@@ -1677,7 +1737,7 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
*/
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
- struct kobject *kobj, struct bin_attribute *attr,
+ struct kobject *kobj, const struct bin_attribute *attr,
char *buf, loff_t ppos, size_t count)
{
struct amdgpu_ras *con =
@@ -1808,6 +1868,9 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
if (!obj || obj->attr_inuse)
return -EINVAL;
+ if (amdgpu_sriov_vf(adev) && !amdgpu_virt_ras_telemetry_block_en(adev, head->block))
+ return 0;
+
get_obj(obj);
snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name),
@@ -1960,6 +2023,7 @@ static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
ret = true;
break;
@@ -2008,8 +2072,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
/* debugfs end */
/* ras fs */
-static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
- amdgpu_ras_sysfs_badpages_read, NULL, 0);
+static const BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
+ amdgpu_ras_sysfs_badpages_read, NULL, 0);
static DEVICE_ATTR(features, S_IRUGO,
amdgpu_ras_sysfs_features_read, NULL);
static DEVICE_ATTR(version, 0444,
@@ -2031,7 +2095,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
&con->event_state_attr.attr,
NULL
};
- struct bin_attribute *bin_attrs[] = {
+ const struct bin_attribute *bin_attrs[] = {
NULL,
NULL,
};
@@ -2057,11 +2121,10 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
if (amdgpu_bad_page_threshold != 0) {
/* add bad_page_features entry */
- bin_attr_gpu_vram_bad_pages.private = NULL;
con->badpages_attr = bin_attr_gpu_vram_bad_pages;
+ sysfs_bin_attr_init(&con->badpages_attr);
bin_attrs[0] = &con->badpages_attr;
- group.bin_attrs = bin_attrs;
- sysfs_bin_attr_init(bin_attrs[0]);
+ group.bin_attrs_new = bin_attrs;
}
r = sysfs_create_group(&adev->dev->kobj, &group);
@@ -2101,6 +2164,16 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
/* Fatal error events are handled on host side */
if (amdgpu_sriov_vf(adev))
return;
+ /*
+ * If the current interrupt is caused by a non-fatal RAS error, skip
+ * check for fatal error. For fatal errors, FED status of all devices
+ * in XGMI hive gets set when the first device gets fatal error
+ * interrupt. The error gets propagated to other devices as well, so
+ * make sure to ack the interrupt regardless of FED status.
+ */
+ if (!amdgpu_ras_get_fed_status(adev) &&
+ amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY))
+ return;
if (adev->nbio.ras &&
adev->nbio.ras->handle_ras_controller_intr_no_bifring)
@@ -2130,6 +2203,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
if (ret)
return;
+ amdgpu_ras_set_err_poison(adev, block_obj->ras_comm.block);
/* both query_poison_status and handle_poison_consumption are optional,
* but at least one of them should be implemented if we need poison
* consumption handler
@@ -2605,6 +2679,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
reset_context.src = AMDGPU_RESET_SRC_RAS;
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
/* Perform full reset in fatal error mode */
if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
@@ -2661,41 +2736,248 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
return 0;
}
-/* it deal with vram only. */
-int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- struct eeprom_table_record *bps, int pages)
+static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps,
+ struct ras_err_data *err_data)
{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct ras_err_handler_data *data;
+ struct ta_ras_query_address_input addr_in;
+ uint32_t socket = 0;
int ret = 0;
- uint32_t i;
- if (!con || !con->eh_data || !bps || pages <= 0)
- return 0;
+ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+ socket = adev->smuio.funcs->get_socket_id(adev);
- mutex_lock(&con->recovery_lock);
- data = con->eh_data;
- if (!data)
- goto out;
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = bps->address;
+ addr_in.ma.socket_id = socket;
+ addr_in.ma.ch_inst = bps->mem_channel;
+ /* tell RAS TA the node instance is not used */
+ addr_in.ma.node_inst = TA_RAS_INV_NODE;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ ret = adev->umc.ras->convert_ras_err_addr(adev, err_data,
+ &addr_in, NULL, false);
+
+ return ret;
+}
+
+static int amdgpu_ras_mca2pa(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps,
+ struct ras_err_data *err_data)
+{
+ struct ta_ras_query_address_input addr_in;
+ uint32_t die_id, socket = 0;
- for (i = 0; i < pages; i++) {
+ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
+ socket = adev->smuio.funcs->get_socket_id(adev);
+
+ /* although die id is gotten from PA in nps1 mode, the id is
+ * fitable for any nps mode
+ */
+ if (adev->umc.ras && adev->umc.ras->get_die_id_from_pa)
+ die_id = adev->umc.ras->get_die_id_from_pa(adev, bps->address,
+ bps->retired_page << AMDGPU_GPU_PAGE_SHIFT);
+ else
+ return -EINVAL;
+
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = bps->address;
+ addr_in.ma.ch_inst = bps->mem_channel;
+ addr_in.ma.umc_inst = bps->mcumc_id;
+ addr_in.ma.node_inst = die_id;
+ addr_in.ma.socket_id = socket;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ return adev->umc.ras->convert_ras_err_addr(adev, err_data,
+ &addr_in, NULL, false);
+ else
+ return -EINVAL;
+}
+
+static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int count)
+{
+ int j;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data = con->eh_data;
+
+ for (j = 0; j < count; j++) {
if (amdgpu_ras_check_bad_page_unlock(con,
- bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT))
continue;
if (!data->space_left &&
- amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
- ret = -ENOMEM;
- goto out;
+ amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
+ return -ENOMEM;
}
- amdgpu_ras_reserve_page(adev, bps[i].retired_page);
+ amdgpu_ras_reserve_page(adev, bps[j].retired_page);
- memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
+ memcpy(&data->bps[data->count], &(bps[j]),
+ sizeof(struct eeprom_table_record));
data->count++;
data->space_left--;
}
+
+ return 0;
+}
+
+static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, struct ras_err_data *err_data,
+ enum amdgpu_memory_partition nps)
+{
+ int i = 0;
+ enum amdgpu_memory_partition save_nps;
+
+ save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+
+ /*old asics just have pa in eeprom*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
+ memcpy(err_data->err_addr, bps,
+ sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
+ goto out;
+ }
+
+ for (i = 0; i < adev->umc.retire_unit; i++)
+ bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+
+ if (save_nps) {
+ if (save_nps == nps) {
+ if (amdgpu_umc_pages_in_a_row(adev, err_data,
+ bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+ } else {
+ if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data))
+ return -EINVAL;
+ }
+ } else {
+ if (bps[0].address == 0) {
+ /* for specific old eeprom data, mca address is not stored,
+ * calc it from pa
+ */
+ if (amdgpu_umc_pa2mca(adev, bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT,
+ &(bps[0].address), AMDGPU_NPS1_PARTITION_MODE))
+ return -EINVAL;
+ }
+
+ if (amdgpu_ras_mca2pa(adev, &bps[0], err_data)) {
+ if (nps == AMDGPU_NPS1_PARTITION_MODE)
+ memcpy(err_data->err_addr, bps,
+ sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
+ else
+ return -EOPNOTSUPP;
+ }
+ }
+
out:
+ return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev->umc.retire_unit);
+}
+
+static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, struct ras_err_data *err_data,
+ enum amdgpu_memory_partition nps)
+{
+ enum amdgpu_memory_partition save_nps;
+
+ save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK;
+ bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
+
+ if (save_nps == nps) {
+ if (amdgpu_umc_pages_in_a_row(adev, err_data,
+ bps->retired_page << AMDGPU_GPU_PAGE_SHIFT))
+ return -EINVAL;
+ } else {
+ if (bps->address) {
+ if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data))
+ return -EINVAL;
+ } else {
+ /* for specific old eeprom data, mca address is not stored,
+ * calc it from pa
+ */
+ if (amdgpu_umc_pa2mca(adev, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT,
+ &(bps->address), AMDGPU_NPS1_PARTITION_MODE))
+ return -EINVAL;
+
+ if (amdgpu_ras_mca2pa(adev, bps, err_data))
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr,
+ adev->umc.retire_unit);
+}
+
+/* it deal with vram only. */
+int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
+ struct eeprom_table_record *bps, int pages, bool from_rom)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_data err_data;
+ struct amdgpu_ras_eeprom_control *control =
+ &adev->psp.ras_context.ras->eeprom_control;
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+ int ret = 0;
+ uint32_t i = 0;
+
+ if (!con || !con->eh_data || !bps || pages <= 0)
+ return 0;
+
+ if (from_rom) {
+ err_data.err_addr =
+ kcalloc(adev->umc.retire_unit,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev, "Failed to alloc UMC error address record in mca2pa conversion!\n");
+ return -ENOMEM;
+ }
+
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ }
+
+ mutex_lock(&con->recovery_lock);
+
+ if (from_rom) {
+ /* there is no pa recs in V3, so skip pa recs processing */
+ if (control->tbl_hdr.version < RAS_TABLE_VER_V3) {
+ for (i = 0; i < pages; i++) {
+ if (control->ras_num_recs - i >= adev->umc.retire_unit) {
+ if ((bps[i].address == bps[i + 1].address) &&
+ (bps[i].mem_channel == bps[i + 1].mem_channel)) {
+ /* deal with retire_unit records a time */
+ ret = __amdgpu_ras_convert_rec_array_from_rom(adev,
+ &bps[i], &err_data, nps);
+ if (ret)
+ control->ras_num_bad_pages -= adev->umc.retire_unit;
+ i += (adev->umc.retire_unit - 1);
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ for (; i < pages; i++) {
+ ret = __amdgpu_ras_convert_rec_from_rom(adev,
+ &bps[i], &err_data, nps);
+ if (ret)
+ control->ras_num_bad_pages -= adev->umc.retire_unit;
+ }
+ } else {
+ ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages);
+ }
+
+ if (from_rom)
+ kfree(err_data.err_addr);
mutex_unlock(&con->recovery_lock);
return ret;
@@ -2712,7 +2994,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data *data;
struct amdgpu_ras_eeprom_control *control;
- int save_count;
+ int save_count, unit_num, bad_page_num, i;
if (!con || !con->eh_data) {
if (new_cnt)
@@ -2724,19 +3006,32 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
mutex_lock(&con->recovery_lock);
control = &con->eeprom_control;
data = con->eh_data;
- save_count = data->count - control->ras_num_recs;
+ bad_page_num = control->ras_num_bad_pages;
+ save_count = data->count - bad_page_num;
mutex_unlock(&con->recovery_lock);
+ unit_num = save_count / adev->umc.retire_unit;
if (new_cnt)
- *new_cnt = save_count / adev->umc.retire_unit;
+ *new_cnt = unit_num;
/* only new entries are saved */
if (save_count > 0) {
- if (amdgpu_ras_eeprom_append(control,
- &data->bps[control->ras_num_recs],
- save_count)) {
- dev_err(adev->dev, "Failed to save EEPROM table data!");
- return -EIO;
+ /*old asics only save pa to eeprom like before*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[bad_page_num], save_count)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ } else {
+ for (i = 0; i < unit_num; i++) {
+ if (amdgpu_ras_eeprom_append(control,
+ &data->bps[bad_page_num +
+ i * adev->umc.retire_unit], 1)) {
+ dev_err(adev->dev, "Failed to save EEPROM table data!");
+ return -EIO;
+ }
+ }
}
dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
@@ -2754,7 +3049,7 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
struct amdgpu_ras_eeprom_control *control =
&adev->psp.ras_context.ras->eeprom_control;
struct eeprom_table_record *bps;
- int ret;
+ int ret, i = 0;
/* no bad page record, skip eeprom access */
if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0)
@@ -2765,11 +3060,49 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
return -ENOMEM;
ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs);
- if (ret)
+ if (ret) {
dev_err(adev->dev, "Failed to load EEPROM table records!");
- else
- ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs);
+ } else {
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+ /*In V3, there is no pa recs, and some cases(when address==0) may be parsed
+ as pa recs, so add verion check to avoid it.
+ */
+ if (control->tbl_hdr.version < RAS_TABLE_VER_V3) {
+ for (i = 0; i < control->ras_num_recs; i++) {
+ if ((control->ras_num_recs - i) >= adev->umc.retire_unit) {
+ if ((bps[i].address == bps[i + 1].address) &&
+ (bps[i].mem_channel == bps[i + 1].mem_channel)) {
+ control->ras_num_pa_recs += adev->umc.retire_unit;
+ i += (adev->umc.retire_unit - 1);
+ } else {
+ control->ras_num_mca_recs +=
+ (control->ras_num_recs - i);
+ break;
+ }
+ } else {
+ control->ras_num_mca_recs += (control->ras_num_recs - i);
+ break;
+ }
+ }
+ } else {
+ control->ras_num_mca_recs = control->ras_num_recs;
+ }
+ }
+ ret = amdgpu_ras_eeprom_check(control);
+ if (ret)
+ goto out;
+
+ /* HW not usable */
+ if (amdgpu_ras_is_rma(adev)) {
+ ret = -EHWPOISON;
+ goto out;
+ }
+
+ ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true);
+ }
+
+out:
kfree(bps);
return ret;
}
@@ -2814,31 +3147,29 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
/*
- * Justification of value bad_page_cnt_threshold in ras structure
- *
- * Generally, 0 <= amdgpu_bad_page_threshold <= max record length
- * in eeprom or amdgpu_bad_page_threshold == -2, introduce two
- * scenarios accordingly.
- *
- * Bad page retirement enablement:
- * - If amdgpu_bad_page_threshold = -2,
- * bad_page_cnt_threshold = typical value by formula.
- *
- * - When the value from user is 0 < amdgpu_bad_page_threshold <
- * max record length in eeprom, use it directly.
- *
- * Bad page retirement disablement:
- * - If amdgpu_bad_page_threshold = 0, bad page retirement
- * functionality is disabled, and bad_page_cnt_threshold will
- * take no effect.
+ * amdgpu_bad_page_threshold is used to config
+ * the threshold for the number of bad pages.
+ * -1: Threshold is set to default value
+ * Driver will issue a warning message when threshold is reached
+ * and continue runtime services.
+ * 0: Disable bad page retirement
+ * Driver will not retire bad pages
+ * which is intended for debugging purpose.
+ * -2: Threshold is determined by a formula
+ * that assumes 1 bad page per 100M of local memory.
+ * Driver will continue runtime services when threhold is reached.
+ * 0 < threshold < max number of bad page records in EEPROM,
+ * A user-defined threshold is set
+ * Driver will halt runtime services when this custom threshold is reached.
*/
-
- if (amdgpu_bad_page_threshold < 0) {
+ if (amdgpu_bad_page_threshold == -2) {
u64 val = adev->gmc.mc_vram_size;
do_div(val, RAS_BAD_PAGE_COVER);
con->bad_page_cnt_threshold = min(lower_32_bits(val),
max_count);
+ } else if (amdgpu_bad_page_threshold == -1) {
+ con->bad_page_cnt_threshold = ((con->reserved_pages_in_bytes) >> 21) << 4;
} else {
con->bad_page_cnt_threshold = min_t(int, max_count,
amdgpu_bad_page_threshold);
@@ -3146,7 +3477,53 @@ static int amdgpu_ras_page_retirement_thread(void *param)
return 0;
}
-int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
+int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_ras_eeprom_control *control;
+ int ret;
+
+ if (!con || amdgpu_sriov_vf(adev))
+ return 0;
+
+ control = &con->eeprom_control;
+ ret = amdgpu_ras_eeprom_init(control);
+ if (ret)
+ return ret;
+
+ if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr)
+ control->ras_num_pa_recs = control->ras_num_recs;
+
+ if (adev->umc.ras &&
+ adev->umc.ras->get_retire_flip_bits)
+ adev->umc.ras->get_retire_flip_bits(adev);
+
+ if (control->ras_num_recs) {
+ ret = amdgpu_ras_load_bad_pages(adev);
+ if (ret)
+ return ret;
+
+ amdgpu_dpm_send_hbm_bad_pages_num(
+ adev, control->ras_num_bad_pages);
+
+ if (con->update_channel_flag == true) {
+ amdgpu_dpm_send_hbm_bad_channel_flag(
+ adev, control->bad_channel_bitmap);
+ con->update_channel_flag = false;
+ }
+
+ /* The format action is only applied to new ASICs */
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) >= 12 &&
+ control->tbl_hdr.version < RAS_TABLE_VER_V3)
+ if (!amdgpu_ras_eeprom_reset_table(control))
+ if (amdgpu_ras_save_bad_pages(adev, NULL))
+ dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n");
+ }
+
+ return ret;
+}
+
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data **data;
@@ -3181,31 +3558,10 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(&con->eeprom_control);
amdgpu_ras_validate_threshold(adev, max_eeprom_records_count);
- /* Todo: During test the SMU might fail to read the eeprom through I2C
- * when the GPU is pending on XGMI reset during probe time
- * (Mostly after second bus reset), skip it now
- */
- if (adev->gmc.xgmi.pending_reset)
- return 0;
- ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
- /*
- * This calling fails when is_rma is true or
- * ret != 0.
- */
- if (amdgpu_ras_is_rma(adev) || ret)
- goto free;
-
- if (con->eeprom_control.ras_num_recs) {
- ret = amdgpu_ras_load_bad_pages(adev);
+ if (init_bp_info) {
+ ret = amdgpu_ras_init_badpage_info(adev);
if (ret)
goto free;
-
- amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
-
- if (con->update_channel_flag == true) {
- amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
- con->update_channel_flag = false;
- }
}
mutex_init(&con->page_rsv_lock);
@@ -3296,6 +3652,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
return true;
default:
@@ -3308,7 +3665,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
+ case IP_VERSION(14, 0, 3):
return true;
default:
return false;
@@ -3370,7 +3729,8 @@ static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev
*/
if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) ||
amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) ||
- amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3))
+ amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3) ||
+ amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(5, 0, 1))
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
1 << AMDGPU_RAS_BLOCK__JPEG);
else
@@ -3438,6 +3798,11 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
if (!amdgpu_ras_asic_supported(adev))
return;
+ if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_virt_get_ras_capability(adev))
+ goto init_ras_enabled_flag;
+ }
+
/* query ras capability from psp */
if (amdgpu_psp_get_ras_capability(&adev->psp))
goto init_ras_enabled_flag;
@@ -3466,8 +3831,13 @@ init_ras_enabled_flag:
adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
adev->ras_hw_enabled & amdgpu_ras_mask;
- /* aca is disabled by default */
- adev->aca.is_enabled = false;
+ /* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->aca.is_enabled =
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14));
+ }
/* bad page feature is not applicable to specific app platform */
if (adev->gmc.is_app_apu &&
@@ -3535,7 +3905,7 @@ static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev)
ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr;
/* init event manager with node 0 on xgmi system */
- if (!amdgpu_in_reset(adev)) {
+ if (!amdgpu_reset_in_recovery(adev)) {
if (!hive || adev->gmc.xgmi.node_id == 0)
ras_event_mgr_init(ras->event_mgr);
}
@@ -3554,8 +3924,11 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
+ con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT;
+ break;
case IP_VERSION(13, 0, 14):
- con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE;
+ con->reserved_pages_in_bytes = (AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT << 1);
break;
default:
break;
@@ -3629,7 +4002,19 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
* check DF RAS */
adev->nbio.ras = &nbio_v4_3_ras;
break;
+ case IP_VERSION(6, 3, 1):
+ if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF))
+ /* unlike other generation of nbio ras,
+ * nbif v6_3_1 only support fatal error interrupt
+ * to inform software that DF is freezed due to
+ * system fatal error event. driver should not
+ * enable nbio ras in such case. Instead,
+ * check DF RAS
+ */
+ adev->nbio.ras = &nbif_v6_3_1_ras;
+ break;
case IP_VERSION(7, 9, 0):
+ case IP_VERSION(7, 9, 1):
if (!adev->gmc.is_app_apu)
adev->nbio.ras = &nbio_v7_9_ras;
break;
@@ -3750,7 +4135,7 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
if (r) {
- if (adev->in_suspend || amdgpu_in_reset(adev)) {
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev)) {
/* in resume phase, if fail to enable ras,
* clean up all ras fs nodes, and disable ras */
goto cleanup;
@@ -3762,7 +4147,7 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
amdgpu_persistent_edc_harvesting(adev, ras_block);
/* in resume phase, no need to create ras fs node */
- if (adev->in_suspend || amdgpu_in_reset(adev))
+ if (adev->in_suspend || amdgpu_reset_in_recovery(adev))
return 0;
ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
@@ -3892,7 +4277,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
amdgpu_ras_event_mgr_init(adev);
if (amdgpu_ras_aca_is_supported(adev)) {
- if (amdgpu_in_reset(adev)) {
+ if (amdgpu_reset_in_recovery(adev)) {
if (amdgpu_aca_is_enabled(adev))
r = amdgpu_aca_reset(adev);
else
@@ -3910,7 +4295,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
}
/* Guest side doesn't need init ras feature */
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_ras_telemetry_en(adev))
return 0;
list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
@@ -4008,7 +4393,7 @@ bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev)
if (!ras)
return false;
- return atomic_read(&ras->fed);
+ return test_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
}
void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status)
@@ -4016,8 +4401,48 @@ void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status)
struct amdgpu_ras *ras;
ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ if (status)
+ set_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+ else
+ clear_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state);
+ }
+}
+
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras)
+ ras->ras_err_state = 0;
+}
+
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
if (ras)
- atomic_set(&ras->fed, !!status);
+ set_bit(block, &ras->ras_err_state);
+}
+
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras) {
+ if (block == AMDGPU_RAS_BLOCK__ANY)
+ return (ras->ras_err_state != 0);
+ else
+ return test_bit(block, &ras->ras_err_state) ||
+ test_bit(AMDGPU_RAS_BLOCK__LAST,
+ &ras->ras_err_state);
+ }
+
+ return false;
}
static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev)
@@ -4095,8 +4520,11 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
enum ras_event_type type = RAS_EVENT_TYPE_FATAL;
u64 event_id;
- if (amdgpu_ras_mark_ras_event(adev, type))
+ if (amdgpu_ras_mark_ras_event(adev, type)) {
+ dev_err(adev->dev,
+ "uncorrectable hardware error (ERREVENT_ATHUB_INTERRUPT) detected!\n");
return;
+ }
event_id = amdgpu_ras_acquire_event_id(adev, type);
@@ -4294,8 +4722,27 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
}
- if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
+ if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+ int hive_ras_recovery = 0;
+
+ if (hive) {
+ hive_ras_recovery = atomic_read(&hive->ras_recovery);
+ amdgpu_put_xgmi_hive(hive);
+ }
+ /* In the case of multiple GPUs, after a GPU has started
+ * resetting all GPUs on hive, other GPUs do not need to
+ * trigger GPU reset again.
+ */
+ if (!hive_ras_recovery)
+ amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+ else
+ atomic_set(&ras->in_recovery, 0);
+ } else {
+ flush_work(&ras->recovery_work);
amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
+ }
+
return 0;
}
@@ -4358,11 +4805,14 @@ bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev,
return false;
}
- if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode))
+ if (amdgpu_sriov_vf(adev)) {
+ *error_query_mode = AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY;
+ } else if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode)) {
*error_query_mode =
(con->is_aca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY;
- else
+ } else {
*error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY;
+ }
return true;
}
@@ -4759,9 +5209,9 @@ static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,
"socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n",
socket_id, aid_id, fw_status);
- if (AMDGPU_RAS_GPU_ERR_UNKNOWN(boot_error))
+ if (AMDGPU_RAS_GPU_ERR_GENERIC(boot_error))
dev_info(adev->dev,
- "socket: %d, aid: %d, fw_status: 0x%x, unknown boot time errors\n",
+ "socket: %d, aid: %d, fw_status: 0x%x, Boot Controller Generic Error\n",
socket_id, aid_id, fw_status);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 669720a9c60a..927d6bff734a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -47,7 +47,7 @@ struct amdgpu_iv_entry;
#define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11)
#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13)
#define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x) AMDGPU_GET_REG_FIELD(x, 29, 29)
-#define AMDGPU_RAS_GPU_ERR_UNKNOWN(x) AMDGPU_GET_REG_FIELD(x, 30, 30)
+#define AMDGPU_RAS_GPU_ERR_GENERIC(x) AMDGPU_GET_REG_FIELD(x, 30, 30)
#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100
#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA
@@ -65,7 +65,7 @@ struct amdgpu_iv_entry;
/* Reserve 8 physical dram row for possible retirement.
* In worst cases, it will lose 8 * 2MB memory in vram domain */
-#define AMDGPU_RAS_RESERVED_VRAM_SIZE (16ULL << 20)
+#define AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT (16ULL << 20)
/* The high three bits indicates socketid */
#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK)
@@ -98,8 +98,10 @@ enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__JPEG,
AMDGPU_RAS_BLOCK__IH,
AMDGPU_RAS_BLOCK__MPIO,
+ AMDGPU_RAS_BLOCK__MMSCH,
- AMDGPU_RAS_BLOCK__LAST
+ AMDGPU_RAS_BLOCK__LAST,
+ AMDGPU_RAS_BLOCK__ANY = -1
};
enum amdgpu_ras_mca_block {
@@ -365,6 +367,7 @@ enum amdgpu_ras_error_query_mode {
AMDGPU_RAS_INVALID_ERROR_QUERY = 0,
AMDGPU_RAS_DIRECT_ERROR_QUERY = 1,
AMDGPU_RAS_FIRMWARE_ERROR_QUERY = 2,
+ AMDGPU_RAS_VIRT_ERROR_COUNT_QUERY = 3,
};
/* ras error status reisger fields */
@@ -481,6 +484,8 @@ struct ras_ecc_err {
uint64_t ipid;
uint64_t addr;
uint64_t pa_pfn;
+ /* save global channel index across all UMC instances */
+ uint32_t channel_idx;
struct ras_err_pages err_pages;
};
@@ -557,8 +562,8 @@ struct amdgpu_ras {
struct ras_ecc_log_info umc_ecc_log;
struct delayed_work page_retirement_dwork;
- /* Fatal error detected flag */
- atomic_t fed;
+ /* ras errors detected */
+ unsigned long ras_err_state;
/* RAS event manager */
struct ras_event_manager __event_mgr;
@@ -736,8 +741,8 @@ struct amdgpu_ras_block_hw_ops {
* 8: feature disable
*/
-
-int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
+int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev);
+int amdgpu_ras_recovery_init(struct amdgpu_device *adev, bool init_bp_info);
void amdgpu_ras_resume(struct amdgpu_device *adev);
void amdgpu_ras_suspend(struct amdgpu_device *adev);
@@ -749,7 +754,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* error handling functions */
int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
- struct eeprom_table_record *bps, int pages);
+ struct eeprom_table_record *bps, int pages, bool from_rom);
int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
unsigned long *new_cnt);
@@ -791,6 +796,12 @@ amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) {
return TA_RAS_BLOCK__VCN;
case AMDGPU_RAS_BLOCK__JPEG:
return TA_RAS_BLOCK__JPEG;
+ case AMDGPU_RAS_BLOCK__IH:
+ return TA_RAS_BLOCK__IH;
+ case AMDGPU_RAS_BLOCK__MPIO:
+ return TA_RAS_BLOCK__MPIO;
+ case AMDGPU_RAS_BLOCK__MMSCH:
+ return TA_RAS_BLOCK__MMSCH;
default:
WARN_ONCE(1, "RAS ERROR: unexpected block id %d\n", block);
return TA_RAS_BLOCK__UMC;
@@ -951,6 +962,10 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a
void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status);
bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev);
+void amdgpu_ras_set_err_poison(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+void amdgpu_ras_clear_err_state(struct amdgpu_device *adev);
+bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block);
u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type);
int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index f28f6b4ba765..2c58e09e56f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -161,6 +161,7 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 10):
return true;
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
return (adev->gmc.is_app_apu) ? false : true;
default:
@@ -177,7 +178,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
if (!control)
return false;
- if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
+ if (adev->bios && amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
/* The address given by VBIOS is an 8-bit, wire-format
* address, i.e. the most significant byte.
*
@@ -223,6 +224,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
return true;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
control->i2c_address = EEPROM_I2C_MADDR_4;
return true;
@@ -413,9 +415,12 @@ static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control
switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
case IP_VERSION(8, 10, 0):
- case IP_VERSION(12, 0, 0):
hdr->version = RAS_TABLE_VER_V2_1;
return;
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 5, 0):
+ hdr->version = RAS_TABLE_VER_V3;
+ return;
default:
hdr->version = RAS_TABLE_VER_V1;
return;
@@ -443,7 +448,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
hdr->header = RAS_TABLE_HDR_VAL;
amdgpu_ras_set_eeprom_table_version(control);
- if (hdr->version == RAS_TABLE_VER_V2_1) {
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
hdr->first_rec_offset = RAS_RECORD_START_V2_1;
hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
RAS_TABLE_V2_1_INFO_SIZE;
@@ -461,7 +466,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
}
csum = __calc_hdr_byte_sum(control);
- if (hdr->version == RAS_TABLE_VER_V2_1)
+ if (hdr->version >= RAS_TABLE_VER_V2_1)
csum += __calc_ras_info_byte_sum(control);
csum = -csum;
hdr->checksum = csum;
@@ -470,9 +475,10 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
res = __write_table_ras_info(control);
control->ras_num_recs = 0;
+ control->ras_num_bad_pages = 0;
control->ras_fri = 0;
- amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs);
+ amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_bad_pages);
control->bad_channel_bitmap = 0;
amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap);
@@ -557,16 +563,17 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
return false;
if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) {
- if (amdgpu_bad_page_threshold == -1) {
+ if (con->eeprom_control.ras_num_bad_pages > con->bad_page_cnt_threshold)
dev_warn(adev->dev, "RAS records:%d exceed threshold:%d",
- con->eeprom_control.ras_num_recs, con->bad_page_cnt_threshold);
+ con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold);
+ if ((amdgpu_bad_page_threshold == -1) ||
+ (amdgpu_bad_page_threshold == -2)) {
dev_warn(adev->dev,
- "But GPU can be operated due to bad_page_threshold = -1.\n");
+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n");
return false;
} else {
- dev_warn(adev->dev, "This GPU is in BAD status.");
- dev_warn(adev->dev, "Please retire it or set a larger "
- "threshold value when reloading driver.\n");
+ dev_warn(adev->dev,
+ "Please consider adjusting the customized threshold.\n");
return true;
}
}
@@ -621,6 +628,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
const u32 num)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control));
+ struct amdgpu_device *adev = to_amdgpu_device(control);
u32 a, b, i;
u8 *buf, *pp;
int res;
@@ -723,6 +731,15 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
control->ras_num_recs = 1 + (control->ras_max_record_count + b
- control->ras_fri)
% control->ras_max_record_count;
+
+ /*old asics only save pa to eeprom like before*/
+ if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12)
+ control->ras_num_pa_recs += num;
+ else
+ control->ras_num_mca_recs += num;
+
+ control->ras_num_bad_pages = control->ras_num_pa_recs +
+ control->ras_num_mca_recs * adev->umc.retire_unit;
Out:
kfree(buf);
return res;
@@ -740,24 +757,25 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
/* Modify the header if it exceeds.
*/
if (amdgpu_bad_page_threshold != 0 &&
- control->ras_num_recs >= ras->bad_page_cnt_threshold) {
+ control->ras_num_bad_pages > ras->bad_page_cnt_threshold) {
dev_warn(adev->dev,
"Saved bad pages %d reaches threshold value %d\n",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
- if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) {
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) {
control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
control->tbl_rai.health_percent = 0;
}
- if (amdgpu_bad_page_threshold != -1)
+ if ((amdgpu_bad_page_threshold != -1) &&
+ (amdgpu_bad_page_threshold != -2))
ras->is_rma = true;
/* ignore the -ENOTSUPP return value */
amdgpu_dpm_send_rma_reason(adev);
}
- if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
RAS_TABLE_V2_1_INFO_SIZE +
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
@@ -797,10 +815,10 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
* now calculate gpu health percent
*/
if (amdgpu_bad_page_threshold != 0 &&
- control->tbl_hdr.version == RAS_TABLE_VER_V2_1 &&
- control->ras_num_recs < ras->bad_page_cnt_threshold)
+ control->tbl_hdr.version >= RAS_TABLE_VER_V2_1 &&
+ control->ras_num_bad_pages <= ras->bad_page_cnt_threshold)
control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
- control->ras_num_recs) * 100) /
+ control->ras_num_bad_pages) * 100) /
ras->bad_page_cnt_threshold;
/* Recalc the checksum.
@@ -810,7 +828,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
csum += *pp;
csum += __calc_hdr_byte_sum(control);
- if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
csum += __calc_ras_info_byte_sum(control);
/* avoid sign extension when assigning to "checksum" */
csum = -csum;
@@ -841,7 +859,8 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
const u32 num)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
- int res;
+ int res, i;
+ uint64_t nps = AMDGPU_NPS1_PARTITION_MODE;
if (!__is_ras_eeprom_supported(adev))
return 0;
@@ -855,6 +874,13 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
return -EINVAL;
}
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
+ /* set the new channel index flag */
+ for (i = 0; i < num; i++)
+ record[i].retired_page |= (nps << UMC_NPS_SHIFT);
+
mutex_lock(&control->ras_tbl_mutex);
res = amdgpu_ras_eeprom_append_table(control, record, num);
@@ -864,6 +890,11 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
amdgpu_ras_debugfs_set_ret_size(control);
mutex_unlock(&control->ras_tbl_mutex);
+
+ /* clear channel index flag, the flag is only saved on eeprom */
+ for (i = 0; i < num; i++)
+ record[i].retired_page &= ~(nps << UMC_NPS_SHIFT);
+
return res;
}
@@ -1014,7 +1045,7 @@ uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *co
/* get available eeprom table version first before eeprom table init */
amdgpu_ras_set_eeprom_table_version(control);
- if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
return RAS_MAX_RECORD_COUNT_V2_1;
else
return RAS_MAX_RECORD_COUNT;
@@ -1259,7 +1290,7 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control
int buf_size, res;
u8 csum, *buf, *pp;
- if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
+ if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1)
buf_size = RAS_TABLE_HEADER_SIZE +
RAS_TABLE_V2_1_INFO_SIZE +
control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
@@ -1362,22 +1393,69 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
__decode_table_header_from_buf(hdr, buf);
- if (hdr->version == RAS_TABLE_VER_V2_1) {
+ if (hdr->header != RAS_TABLE_HDR_VAL &&
+ hdr->header != RAS_TABLE_HDR_BAD) {
+ dev_info(adev->dev, "Creating a new EEPROM table");
+ return amdgpu_ras_eeprom_reset_table(control);
+ }
+
+ switch (hdr->version) {
+ case RAS_TABLE_VER_V2_1:
+ case RAS_TABLE_VER_V3:
control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
control->ras_record_offset = RAS_RECORD_START_V2_1;
control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
- } else {
+ break;
+ case RAS_TABLE_VER_V1:
control->ras_num_recs = RAS_NUM_RECS(hdr);
control->ras_record_offset = RAS_RECORD_START;
control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
+ break;
+ default:
+ dev_err(adev->dev,
+ "RAS header invalid, unsupported version: %u",
+ hdr->version);
+ return -EINVAL;
}
+
+ if (control->ras_num_recs > control->ras_max_record_count) {
+ dev_err(adev->dev,
+ "RAS header invalid, records in header: %u max allowed :%u",
+ control->ras_num_recs, control->ras_max_record_count);
+ return -EINVAL;
+ }
+
control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
+ control->ras_num_mca_recs = 0;
+ control->ras_num_pa_recs = 0;
+ return 0;
+}
+
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ int res = 0;
+
+ if (!__is_ras_eeprom_supported(adev))
+ return 0;
+
+ /* Verify i2c adapter is initialized */
+ if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
+ return -ENOENT;
+
+ if (!__get_eeprom_i2c_addr(adev, control))
+ return -EINVAL;
+
+ control->ras_num_bad_pages = control->ras_num_pa_recs +
+ control->ras_num_mca_recs * adev->umc.retire_unit;
if (hdr->header == RAS_TABLE_HDR_VAL) {
DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
- control->ras_num_recs);
+ control->ras_num_bad_pages);
- if (hdr->version == RAS_TABLE_VER_V2_1) {
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
res = __read_table_ras_info(control);
if (res)
return res;
@@ -1385,28 +1463,32 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
res = __verify_ras_table_checksum(control);
if (res)
- DRM_ERROR("RAS table incorrect checksum or error:%d\n",
- res);
+ dev_err(adev->dev,
+ "RAS table incorrect checksum or error:%d\n",
+ res);
/* Warn if we are at 90% of the threshold or above
*/
- if (10 * control->ras_num_recs >= 9 * ras->bad_page_cnt_threshold)
+ if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
- control->ras_num_recs,
+ control->ras_num_bad_pages,
ras->bad_page_cnt_threshold);
} else if (hdr->header == RAS_TABLE_HDR_BAD &&
amdgpu_bad_page_threshold != 0) {
- if (hdr->version == RAS_TABLE_VER_V2_1) {
+ if (hdr->version >= RAS_TABLE_VER_V2_1) {
res = __read_table_ras_info(control);
if (res)
return res;
}
res = __verify_ras_table_checksum(control);
- if (res)
- DRM_ERROR("RAS Table incorrect checksum or error:%d\n",
- res);
- if (ras->bad_page_cnt_threshold > control->ras_num_recs) {
+ if (res) {
+ dev_err(adev->dev,
+ "RAS Table incorrect checksum or error:%d\n",
+ res);
+ return -EINVAL;
+ }
+ if (ras->bad_page_cnt_threshold >= control->ras_num_bad_pages) {
/* This means that, the threshold was increased since
* the last time the system was booted, and now,
* ras->bad_page_cnt_threshold - control->num_recs > 0,
@@ -1416,28 +1498,25 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
dev_info(adev->dev,
"records:%d threshold:%d, resetting "
"RAS table header signature",
- control->ras_num_recs,
+ control->ras_num_bad_pages,
ras->bad_page_cnt_threshold);
res = amdgpu_ras_eeprom_correct_header_tag(control,
RAS_TABLE_HDR_VAL);
} else {
- dev_err(adev->dev, "RAS records:%d exceed threshold:%d",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
- if (amdgpu_bad_page_threshold == -1) {
- dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1.");
+ dev_warn(adev->dev,
+ "RAS records:%d exceed threshold:%d\n",
+ control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
+ if ((amdgpu_bad_page_threshold == -1) ||
+ (amdgpu_bad_page_threshold == -2)) {
res = 0;
+ dev_warn(adev->dev,
+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n");
} else {
ras->is_rma = true;
- dev_err(adev->dev,
- "RAS records:%d exceed threshold:%d, "
- "GPU will not be initialized. Replace this GPU or increase the threshold",
- control->ras_num_recs, ras->bad_page_cnt_threshold);
+ dev_warn(adev->dev,
+ "User defined threshold is set, runtime service will be halt when threshold is reached\n");
}
}
- } else {
- DRM_INFO("Creating a new EEPROM table");
-
- res = amdgpu_ras_eeprom_reset_table(control);
}
return res < 0 ? res : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index b9ebda577797..ec6d7ea37ad0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -28,6 +28,7 @@
#define RAS_TABLE_VER_V1 0x00010000
#define RAS_TABLE_VER_V2_1 0x00021000
+#define RAS_TABLE_VER_V3 0x00030000
struct amdgpu_device;
@@ -82,6 +83,17 @@ struct amdgpu_ras_eeprom_control {
*/
u32 ras_num_recs;
+ /* the bad page number is ras_num_recs or
+ * ras_num_recs * umc.retire_unit
+ */
+ u32 ras_num_bad_pages;
+
+ /* Number of records store mca address */
+ u32 ras_num_mca_recs;
+
+ /* Number of records store physical address */
+ u32 ras_num_pa_recs;
+
/* First record index to read, 0-based.
* Range is [0, num_recs-1]. This is
* an absolute index, starting right after
@@ -145,6 +157,8 @@ uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *co
void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
+int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control);
+
extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 66c1a868c0e1..dabfbdf6f1ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -26,6 +26,156 @@
#include "sienna_cichlid.h"
#include "smu_v13_0_10.h"
+static int amdgpu_reset_xgmi_reset_on_init_suspend(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ if (!adev->ip_blocks[i].status.hw)
+ continue;
+ /* displays are handled in phase1 */
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
+ continue;
+
+ /* XXX handle errors */
+ amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ adev->ip_blocks[i].status.hw = false;
+ }
+
+ /* VCN FW shared region is in frambuffer, there are some flags
+ * initialized in that region during sw_init. Make sure the region is
+ * backed up.
+ */
+ amdgpu_vcn_save_vcpu_bo(adev);
+
+ return 0;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_prep_hwctxt(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev;
+ int r;
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ amdgpu_unregister_gpu_instance(tmp_adev);
+ r = amdgpu_reset_xgmi_reset_on_init_suspend(tmp_adev);
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "xgmi reset on init: prepare for reset failed");
+ return r;
+ }
+ }
+
+ return r;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_restore_hwctxt(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ int r;
+
+ r = amdgpu_device_reinit_after_reset(reset_context);
+ if (r)
+ return r;
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ if (!tmp_adev->kfd.init_complete) {
+ kgd2kfd_init_zone_device(tmp_adev);
+ amdgpu_amdkfd_device_init(tmp_adev);
+ amdgpu_amdkfd_drm_client_create(tmp_adev);
+ }
+ }
+
+ return r;
+}
+
+static int amdgpu_reset_xgmi_reset_on_init_perform_reset(
+ struct amdgpu_reset_control *reset_ctl,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *tmp_adev = NULL;
+ int r;
+
+ dev_dbg(adev->dev, "xgmi roi - hw reset\n");
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_lock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset =
+ amdgpu_asic_reset_method(adev);
+ }
+ r = 0;
+ /* Mode1 reset needs to be triggered on all devices together */
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ /* For XGMI run all resets in parallel to speed up the process */
+ if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
+ r = -EALREADY;
+ if (r) {
+ dev_err(tmp_adev->dev,
+ "xgmi reset on init: reset failed with error, %d",
+ r);
+ break;
+ }
+ }
+
+ /* For XGMI wait for all resets to complete before proceed */
+ if (!r) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ flush_work(&tmp_adev->xgmi_reset_work);
+ r = tmp_adev->asic_reset_res;
+ if (r)
+ break;
+ }
+ }
+
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+ mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
+ tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
+ }
+
+ return r;
+}
+
+int amdgpu_reset_do_xgmi_reset_on_init(
+ struct amdgpu_reset_context *reset_context)
+{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
+ struct amdgpu_device *adev;
+ int r;
+
+ if (!reset_device_list || list_empty(reset_device_list) ||
+ list_is_singular(reset_device_list))
+ return -EINVAL;
+
+ adev = list_first_entry(reset_device_list, struct amdgpu_device,
+ reset_list);
+ r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
+ if (r)
+ return r;
+
+ r = amdgpu_reset_perform_reset(adev, reset_context);
+
+ return r;
+}
+
+struct amdgpu_reset_handler xgmi_reset_on_init_handler = {
+ .reset_method = AMD_RESET_METHOD_ON_INIT,
+ .prepare_env = NULL,
+ .prepare_hwcontext = amdgpu_reset_xgmi_reset_on_init_prep_hwctxt,
+ .perform_reset = amdgpu_reset_xgmi_reset_on_init_perform_reset,
+ .restore_hwcontext = amdgpu_reset_xgmi_reset_on_init_restore_hwctxt,
+ .restore_env = NULL,
+ .do_reset = NULL,
+};
+
int amdgpu_reset_init(struct amdgpu_device *adev)
{
int ret = 0;
@@ -33,6 +183,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
ret = aldebaran_reset_init(adev);
break;
@@ -56,6 +207,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
ret = aldebaran_reset_fini(adev);
break;
@@ -192,3 +344,8 @@ void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
strscpy(buf, "unknown", len);
}
}
+
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev)
+{
+ return (adev->init_lvl->level == AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 1cb920abc2fe..4d9b9701139b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -153,4 +153,11 @@ void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \
(handler = (*reset_ctl->reset_handlers)[i]); \
++i)
+
+extern struct amdgpu_reset_handler xgmi_reset_on_init_handler;
+int amdgpu_reset_do_xgmi_reset_on_init(
+ struct amdgpu_reset_context *reset_context);
+
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 690976665cf6..426834806fbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -108,10 +108,22 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
*/
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
- int i;
+ uint32_t occupied, chunk1, chunk2;
- for (i = 0; i < count; i++)
- amdgpu_ring_write(ring, ring->funcs->nop);
+ occupied = ring->wptr & ring->buf_mask;
+ chunk1 = ring->buf_mask + 1 - occupied;
+ chunk1 = (chunk1 >= count) ? count : chunk1;
+ chunk2 = count - chunk1;
+
+ if (chunk1)
+ memset32(&ring->ring[occupied], ring->funcs->nop, chunk1);
+
+ if (chunk2)
+ memset32(ring->ring, ring->funcs->nop, chunk2);
+
+ ring->wptr += count;
+ ring->wptr &= ring->ptr_mask;
+ ring->count_dw -= count;
}
/**
@@ -141,6 +153,9 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
{
uint32_t count;
+ if (ring->count_dw < 0)
+ DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
+
/* We pad to match fetch size */
count = ring->funcs->align_mask + 1 -
(ring->wptr & ring->funcs->align_mask);
@@ -172,14 +187,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
}
#define amdgpu_ring_get_gpu_addr(ring, offset) \
- (ring->is_mes_queue ? \
- (ring->mes_ctx->meta_data_gpu_addr + offset) : \
- (ring->adev->wb.gpu_addr + offset * 4))
+ (ring->adev->wb.gpu_addr + offset * 4)
#define amdgpu_ring_get_cpu_addr(ring, offset) \
- (ring->is_mes_queue ? \
- (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
- (&ring->adev->wb.wb[offset]))
+ (&ring->adev->wb.wb[offset])
/**
* amdgpu_ring_init - init driver ring struct.
@@ -228,57 +239,42 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->sched_score = sched_score;
ring->vmid_wait = dma_fence_get_stub();
- if (!ring->is_mes_queue) {
- ring->idx = adev->num_rings++;
- adev->rings[ring->idx] = ring;
- }
+ ring->idx = adev->num_rings++;
+ adev->rings[ring->idx] = ring;
r = amdgpu_fence_driver_init_ring(ring);
if (r)
return r;
}
- if (ring->is_mes_queue) {
- ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_RPTR_OFFS);
- ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_WPTR_OFFS);
- ring->fence_offs = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_FENCE_OFFS);
- ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_TRAIL_FENCE_OFFS);
- ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_COND_EXE_OFFS);
- } else {
- r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &ring->rptr_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r);
+ return r;
+ }
- r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &ring->wptr_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r);
+ return r;
+ }
- r = amdgpu_device_wb_get(adev, &ring->fence_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &ring->fence_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r);
+ return r;
+ }
- r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r);
+ return r;
+ }
- r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
- if (r) {
- dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
+ if (r) {
+ dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
+ return r;
}
ring->fence_gpu_addr =
@@ -309,38 +305,36 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
/* always set cond_exec_polling to CONTINUE */
*ring->cond_exe_cpu_addr = 1;
- r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
- if (r) {
- dev_err(adev->dev, "failed initializing fences (%d).\n", r);
- return r;
- }
+ if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) {
+ r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
+ if (r) {
+ dev_err(adev->dev, "failed initializing fences (%d).\n", r);
+ return r;
+ }
- max_ibs_dw = ring->funcs->emit_frame_size +
- amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size;
- max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
+ max_ibs_dw = ring->funcs->emit_frame_size +
+ amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size;
+ max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
- if (WARN_ON(max_ibs_dw > max_dw))
- max_dw = max_ibs_dw;
+ if (WARN_ON(max_ibs_dw > max_dw))
+ max_dw = max_ibs_dw;
- ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
+ ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission);
+ } else {
+ ring->ring_size = roundup_pow_of_two(max_dw * 4);
+ ring->count_dw = (ring->ring_size - 4) >> 2;
+ /* ring buffer is empty now */
+ ring->wptr = *ring->rptr_cpu_addr = 0;
+ }
ring->buf_mask = (ring->ring_size / 4) - 1;
ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
0xffffffffffffffff : ring->buf_mask;
+ /* Initialize cached_rptr to 0 */
+ ring->cached_rptr = 0;
/* Allocate ring buffer */
- if (ring->is_mes_queue) {
- int offset = 0;
-
- BUG_ON(ring->ring_size > PAGE_SIZE*4);
-
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_RING_OFFS);
- ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- amdgpu_ring_clear_ring(ring);
-
- } else if (ring->ring_obj == NULL) {
+ if (ring->ring_obj == NULL) {
r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&ring->ring_obj,
@@ -377,32 +371,26 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
{
/* Not to finish a ring which is not initialized */
- if (!(ring->adev) ||
- (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
+ if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
return;
ring->sched.ready = false;
- if (!ring->is_mes_queue) {
- amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
- amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
+ amdgpu_device_wb_free(ring->adev, ring->rptr_offs);
+ amdgpu_device_wb_free(ring->adev, ring->wptr_offs);
- amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
- amdgpu_device_wb_free(ring->adev, ring->fence_offs);
+ amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs);
+ amdgpu_device_wb_free(ring->adev, ring->fence_offs);
- amdgpu_bo_free_kernel(&ring->ring_obj,
- &ring->gpu_addr,
- (void **)&ring->ring);
- } else {
- kfree(ring->fence_drv.fences);
- }
+ amdgpu_bo_free_kernel(&ring->ring_obj,
+ &ring->gpu_addr,
+ (void **)&ring->ring);
dma_fence_put(ring->vmid_wait);
ring->vmid_wait = NULL;
ring->me = 0;
- if (!ring->is_mes_queue)
- ring->adev->rings[ring->idx] = NULL;
+ ring->adev->rings[ring->idx] = NULL;
}
/**
@@ -478,6 +466,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
uint32_t value, result, early[3];
+ uint64_t p;
loff_t i;
int r;
@@ -487,13 +476,18 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
result = 0;
if (*pos < 12) {
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ mutex_lock(&ring->adev->cper.ring_lock);
+
early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
early[2] = ring->wptr & ring->buf_mask;
for (i = *pos / 4; i < 3 && size; i++) {
r = put_user(early[i], (uint32_t *)buf);
- if (r)
- return r;
+ if (r) {
+ result = r;
+ goto out;
+ }
buf += 4;
result += 4;
size -= 4;
@@ -501,86 +495,94 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
}
}
- while (size) {
- if (*pos >= (ring->ring_size + 12))
- return result;
+ if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) {
+ while (size) {
+ if (*pos >= (ring->ring_size + 12))
+ return result;
- value = ring->ring[(*pos - 12)/4];
- r = put_user(value, (uint32_t *)buf);
- if (r)
- return r;
- buf += 4;
- result += 4;
- size -= 4;
- *pos += 4;
+ value = ring->ring[(*pos - 12)/4];
+ r = put_user(value, (uint32_t *)buf);
+ if (r)
+ return r;
+ buf += 4;
+ result += 4;
+ size -= 4;
+ *pos += 4;
+ }
+ } else {
+ p = early[0];
+ if (early[0] <= early[1])
+ size = (early[1] - early[0]);
+ else
+ size = ring->ring_size - (early[0] - early[1]);
+
+ while (size) {
+ if (p == early[1])
+ goto out;
+
+ value = ring->ring[p];
+ r = put_user(value, (uint32_t *)buf);
+ if (r) {
+ result = r;
+ goto out;
+ }
+
+ buf += 4;
+ result += 4;
+ size--;
+ p++;
+ p &= ring->ptr_mask;
+ }
}
+out:
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ mutex_unlock(&ring->adev->cper.ring_lock);
+
return result;
}
-static const struct file_operations amdgpu_debugfs_ring_fops = {
- .owner = THIS_MODULE,
- .read = amdgpu_debugfs_ring_read,
- .llseek = default_llseek
-};
-
-static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
- size_t size, loff_t *pos)
+static ssize_t amdgpu_debugfs_virt_ring_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
- volatile u32 *mqd;
- u32 *kbuf;
- int r, i;
- uint32_t value, result;
if (*pos & 3 || size & 3)
return -EINVAL;
- kbuf = kmalloc(ring->mqd_size, GFP_KERNEL);
- if (!kbuf)
- return -ENOMEM;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto err_free;
+ if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
+ amdgpu_virt_req_ras_cper_dump(ring->adev, false);
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
- if (r)
- goto err_unreserve;
+ return amdgpu_debugfs_ring_read(f, buf, size, pos);
+}
- /*
- * Copy to local buffer to avoid put_user(), which might fault
- * and acquire mmap_sem, under reservation_ww_class_mutex.
- */
- for (i = 0; i < ring->mqd_size/sizeof(u32); i++)
- kbuf[i] = mqd[i];
+static const struct file_operations amdgpu_debugfs_ring_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_ring_read,
+ .llseek = default_llseek
+};
- amdgpu_bo_kunmap(ring->mqd_obj);
- amdgpu_bo_unreserve(ring->mqd_obj);
+static const struct file_operations amdgpu_debugfs_virt_ring_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_virt_ring_read,
+ .llseek = default_llseek
+};
- result = 0;
- while (size) {
- if (*pos >= ring->mqd_size)
- break;
+static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_ring *ring = file_inode(f)->i_private;
+ ssize_t bytes = min_t(ssize_t, ring->mqd_size - *pos, size);
+ void *from = ((u8 *)ring->mqd_ptr) + *pos;
- value = kbuf[*pos/4];
- r = put_user(value, (uint32_t *)buf);
- if (r)
- goto err_free;
- buf += 4;
- result += 4;
- size -= 4;
- *pos += 4;
- }
+ if (*pos > ring->mqd_size)
+ return 0;
- kfree(kbuf);
- return result;
+ if (copy_to_user(buf, from, bytes))
+ return -EFAULT;
-err_unreserve:
- amdgpu_bo_unreserve(ring->mqd_obj);
-err_free:
- kfree(kbuf);
- return r;
+ *pos += bytes;
+ return bytes;
}
static const struct file_operations amdgpu_debugfs_mqd_fops = {
@@ -611,9 +613,14 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
char name[32];
sprintf(name, "amdgpu_ring_%s", ring->name);
- debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
- &amdgpu_debugfs_ring_fops,
- ring->ring_size + 12);
+ if (amdgpu_sriov_vf(adev))
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_virt_ring_fops,
+ ring->ring_size + 12);
+ else
+ debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
+ &amdgpu_debugfs_ring_fops,
+ ring->ring_size + 12);
if (ring->mqd_obj) {
sprintf(name, "amdgpu_mqd_%s", ring->name);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index f93f51002201..e1f25218943a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -37,7 +37,7 @@ struct amdgpu_job;
struct amdgpu_vm;
/* max number of rings */
-#define AMDGPU_MAX_RINGS 124
+#define AMDGPU_MAX_RINGS 149
#define AMDGPU_MAX_HWIP_RINGS 64
#define AMDGPU_MAX_GFX_RINGS 2
#define AMDGPU_MAX_SW_GFX_RINGS 2
@@ -82,6 +82,7 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_KIQ,
AMDGPU_RING_TYPE_MES,
AMDGPU_RING_TYPE_UMSCH_MM,
+ AMDGPU_RING_TYPE_CPER,
};
enum amdgpu_ib_pool_type {
@@ -126,6 +127,22 @@ struct amdgpu_fence_driver {
struct dma_fence **fences;
};
+/*
+ * Fences mark an event in the GPUs pipeline and are used
+ * for GPU/CPU synchronization. When the fence is written,
+ * it is expected that all buffers associated with that fence
+ * are no longer in use by the associated ring on the GPU and
+ * that the relevant GPU caches have been flushed.
+ */
+
+struct amdgpu_fence {
+ struct dma_fence base;
+
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+ ktime_t start_timestamp;
+};
+
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
@@ -163,8 +180,24 @@ void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
/* provided by hw blocks that expose a ring buffer for commands */
struct amdgpu_ring_funcs {
+ /**
+ * @type:
+ *
+ * GFX, Compute, SDMA, UVD, VCE, VCN, VPE, KIQ, MES, UMSCH, and CPER
+ * use ring buffers. The type field just identifies which component the
+ * ring buffer is associated with.
+ */
enum amdgpu_ring_type type;
uint32_t align_mask;
+
+ /**
+ * @nop:
+ *
+ * Every block in the amdgpu has no-op instructions (e.g., GFX 10
+ * uses PACKET3(PACKET3_NOP, 0x3FFF), VCN 5 uses VCN_ENC_CMD_NO_OP,
+ * etc). This field receives the specific no-op for the component
+ * that initializes the ring.
+ */
u32 nop;
bool support_64bit_ptrs;
bool no_user_fence;
@@ -237,8 +270,12 @@ struct amdgpu_ring_funcs {
void (*patch_de)(struct amdgpu_ring *ring, unsigned offset);
int (*reset)(struct amdgpu_ring *ring, unsigned int vmid);
void (*emit_cleaner_shader)(struct amdgpu_ring *ring);
+ bool (*is_guilty)(struct amdgpu_ring *ring);
};
+/**
+ * amdgpu_ring - Holds ring information
+ */
struct amdgpu_ring {
struct amdgpu_device *adev;
const struct amdgpu_ring_funcs *funcs;
@@ -246,17 +283,65 @@ struct amdgpu_ring {
struct drm_gpu_scheduler sched;
struct amdgpu_bo *ring_obj;
- volatile uint32_t *ring;
+ uint32_t *ring;
unsigned rptr_offs;
u64 rptr_gpu_addr;
volatile u32 *rptr_cpu_addr;
+
+ /**
+ * @wptr:
+ *
+ * This is part of the Ring buffer implementation and represents the
+ * write pointer. The wptr determines where the host has written.
+ */
u64 wptr;
+
+ /**
+ * @wptr_old:
+ *
+ * Before update wptr with the new value, usually the old value is
+ * stored in the wptr_old.
+ */
u64 wptr_old;
unsigned ring_size;
+
+ /**
+ * @max_dw:
+ *
+ * Maximum number of DWords for ring allocation. This information is
+ * provided at the ring initialization time, and each IP block can
+ * specify a specific value. Check places that invoke
+ * amdgpu_ring_init() to see the maximum size per block.
+ */
unsigned max_dw;
+
+ /**
+ * @count_dw:
+ *
+ * This value starts with the maximum amount of DWords supported by the
+ * ring. This value is updated based on the ring manipulation.
+ */
int count_dw;
uint64_t gpu_addr;
+
+ /**
+ * @ptr_mask:
+ *
+ * Some IPs provide support for 64-bit pointers and others for 32-bit
+ * only; this behavior is component-specific and defined by the field
+ * support_64bit_ptr. If the IP block supports 64-bits, the mask
+ * 0xffffffffffffffff is set; otherwise, this value assumes buf_mask.
+ * Notice that this field is used to keep wptr under a valid range.
+ */
uint64_t ptr_mask;
+
+ /**
+ * @buf_mask:
+ *
+ * Buffer mask is a value used to keep wptr count under its
+ * thresholding. Buffer mask initialized during the ring buffer
+ * initialization time, and it is defined as (ring_size / 4) -1.
+ */
uint32_t buf_mask;
u32 idx;
u32 xcc_id;
@@ -274,6 +359,13 @@ struct amdgpu_ring {
bool use_pollmem;
unsigned wptr_offs;
u64 wptr_gpu_addr;
+
+ /**
+ * @wptr_cpu_addr:
+ *
+ * This is the CPU address pointer in the writeback slot. This is used
+ * to commit changes to the GPU.
+ */
volatile u32 *wptr_cpu_addr;
unsigned fence_offs;
u64 fence_gpu_addr;
@@ -288,25 +380,22 @@ struct amdgpu_ring {
u64 cond_exe_gpu_addr;
volatile u32 *cond_exe_cpu_addr;
unsigned int set_q_mode_offs;
- volatile u32 *set_q_mode_ptr;
+ u32 *set_q_mode_ptr;
u64 set_q_mode_token;
unsigned vm_hub;
unsigned vm_inv_eng;
struct dma_fence *vmid_wait;
bool has_compute_vm_bug;
bool no_scheduler;
+ bool no_user_submission;
int hw_prio;
unsigned num_hw_submission;
atomic_t *sched_score;
- /* used for mes */
- bool is_mes_queue;
- uint32_t hw_queue_id;
- struct amdgpu_mes_ctx_data *mes_ctx;
-
bool is_sw_ring;
unsigned int entry_index;
-
+ /* store the cached rptr to restore after reset */
+ uint64_t cached_rptr;
};
#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
@@ -377,8 +466,6 @@ static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
{
- if (ring->count_dw <= 0)
- DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
ring->ring[ring->wptr++ & ring->buf_mask] = v;
ring->wptr &= ring->ptr_mask;
ring->count_dw--;
@@ -388,13 +475,8 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
void *src, int count_dw)
{
unsigned occupied, chunk1, chunk2;
- void *dst;
-
- if (unlikely(ring->count_dw < count_dw))
- DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
occupied = ring->wptr & ring->buf_mask;
- dst = (void *)&ring->ring[occupied];
chunk1 = ring->buf_mask + 1 - occupied;
chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
chunk2 = count_dw - chunk1;
@@ -402,12 +484,11 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
chunk2 <<= 2;
if (chunk1)
- memcpy(dst, src, chunk1);
+ memcpy(&ring->ring[occupied], src, chunk1);
if (chunk2) {
src += chunk1;
- dst = (void *)ring->ring;
- memcpy(dst, src, chunk2);
+ memcpy(ring->ring, src, chunk2);
}
ring->wptr += count_dw;
@@ -439,15 +520,6 @@ static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
ring->ring[offset] = cur - offset;
}
-#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \
- (ring->is_mes_queue && ring->mes_ctx ? \
- (ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
-
-#define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset) \
- (ring->is_mes_queue && ring->mes_ctx ? \
- (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
- NULL)
-
int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
@@ -470,8 +542,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size,
enum amdgpu_ib_pool_type pool,
struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
- struct dma_fence *f);
+void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f);
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_ib *ibs, struct amdgpu_job *job,
struct dma_fence **f);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index 1c66da1c3fb4..7e7d6c3865bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -124,7 +124,7 @@ static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
}
}
- del_timer(&mux->resubmit_timer);
+ timer_delete(&mux->resubmit_timer);
mux->s_resubmit = false;
}
@@ -135,7 +135,8 @@ static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
{
- struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
+ struct amdgpu_ring_mux *mux = timer_container_of(mux, t,
+ resubmit_timer);
if (!spin_trylock(&mux->lock)) {
amdgpu_ring_mux_schedule_resubmit(mux);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index fce22d3f816b..c210625be220 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -237,6 +237,20 @@ struct amdgpu_rlc_funcs {
void (*unset_safe_mode)(struct amdgpu_device *adev, int xcc_id);
int (*init)(struct amdgpu_device *adev);
u32 (*get_csb_size)(struct amdgpu_device *adev);
+
+ /**
+ * @get_csb_buffer: Get the clear state to be put into the hardware.
+ *
+ * The parameter adev is used to get the CS data and other gfx info,
+ * and buffer is the RLC CS pointer
+ *
+ * Sometimes, the user space puts a request to clear the state in the
+ * command buffer; this function provides the clear state that gets put
+ * into the hardware. Note that the driver programs Clear State
+ * Indirect Buffer (CSB) explicitly when it sets up the kernel rings,
+ * and it also provides a pointer to it which is used by the firmware
+ * to load the clear state in some cases.
+ */
void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer);
int (*get_cp_table_num)(struct amdgpu_device *adev);
int (*resume)(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 10df731998b2..39070b2a4c04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -93,8 +93,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
return 0;
}
-void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct drm_suballoc **sa_bo,
- struct dma_fence *fence)
+void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence)
{
if (sa_bo == NULL || *sa_bo == NULL) {
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index b0a8abc7a8ec..341beec59537 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -35,21 +35,19 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
int fd,
int32_t priority)
{
- struct fd f = fdget(fd);
+ CLASS(fd, f)(fd);
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx_mgr *mgr;
struct amdgpu_ctx *ctx;
uint32_t id;
int r;
- if (!fd_file(f))
+ if (fd_empty(f))
return -EINVAL;
r = amdgpu_file_to_fpriv(fd_file(f), &fpriv);
- if (r) {
- fdput(f);
+ if (r)
return r;
- }
mgr = &fpriv->ctx_mgr;
mutex_lock(&mgr->lock);
@@ -57,7 +55,6 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
amdgpu_ctx_priority_override(ctx, priority);
mutex_unlock(&mgr->lock);
- fdput(f);
return 0;
}
@@ -66,31 +63,25 @@ static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
unsigned ctx_id,
int32_t priority)
{
- struct fd f = fdget(fd);
+ CLASS(fd, f)(fd);
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx *ctx;
int r;
- if (!fd_file(f))
+ if (fd_empty(f))
return -EINVAL;
r = amdgpu_file_to_fpriv(fd_file(f), &fpriv);
- if (r) {
- fdput(f);
+ if (r)
return r;
- }
ctx = amdgpu_ctx_get(fpriv, ctx_id);
- if (!ctx) {
- fdput(f);
+ if (!ctx)
return -EINVAL;
- }
amdgpu_ctx_priority_override(ctx, priority);
amdgpu_ctx_put(ctx);
- fdput(f);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 183a976ba29d..9b54a1ece447 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -25,6 +25,9 @@
#include "amdgpu.h"
#include "amdgpu_sdma.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_3_0_sh_mask.h"
#define AMDGPU_CSA_SDMA_SIZE 64
/* SDMA CSA reside in the 3rd page of CSA */
@@ -75,22 +78,14 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp)
return 0;
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
+ r = amdgpu_sdma_get_index_from_ring(ring, &index);
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- sdma[ring->idx].sdma_meta_data);
- csa_mc_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- } else {
- r = amdgpu_sdma_get_index_from_ring(ring, &index);
-
- if (r || index > 31)
- csa_mc_addr = 0;
- else
- csa_mc_addr = amdgpu_csa_vaddr(adev) +
- AMDGPU_CSA_SDMA_OFFSET +
- index * AMDGPU_CSA_SDMA_SIZE;
- }
+ if (r || index > 31)
+ csa_mc_addr = 0;
+ else
+ csa_mc_addr = amdgpu_csa_vaddr(adev) +
+ AMDGPU_CSA_SDMA_OFFSET +
+ index * AMDGPU_CSA_SDMA_SIZE;
return csa_mc_addr;
}
@@ -219,9 +214,11 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix));
if (instance == 0)
err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s.bin", ucode_prefix);
else
err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s%d.bin", ucode_prefix, instance);
if (err)
goto out;
@@ -261,6 +258,8 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
IP_VERSION(4, 4, 2) ||
amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
+ IP_VERSION(4, 4, 4) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
IP_VERSION(4, 4, 5)) &&
adev->firmware.load_type ==
AMDGPU_FW_LOAD_PSP &&
@@ -343,3 +342,288 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev)
return 0;
}
+
+/*
+ * debugfs for to enable/disable sdma job submission to specific core.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u64 i, num_ring;
+ u64 mask = 0;
+ struct amdgpu_ring *ring, *page = NULL;
+
+ if (!adev)
+ return -ENODEV;
+
+ /* Determine the number of rings per SDMA instance
+ * (1 for sdma gfx ring, 2 if page queue exists)
+ */
+ if (adev->sdma.has_page_queue)
+ num_ring = 2;
+ else
+ num_ring = 1;
+
+ /* Calculate the maximum possible mask value
+ * based on the number of SDMA instances and rings
+ */
+ mask = BIT_ULL(adev->sdma.num_instances * num_ring) - 1;
+
+ if ((val & mask) == 0)
+ return -EINVAL;
+
+ for (i = 0; i < adev->sdma.num_instances; ++i) {
+ ring = &adev->sdma.instance[i].ring;
+ if (adev->sdma.has_page_queue)
+ page = &adev->sdma.instance[i].page;
+ if (val & BIT_ULL(i * num_ring))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+
+ if (page) {
+ if (val & BIT_ULL(i * num_ring + 1))
+ page->sched.ready = true;
+ else
+ page->sched.ready = false;
+ }
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_sdma_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u64 i, num_ring;
+ u64 mask = 0;
+ struct amdgpu_ring *ring, *page = NULL;
+
+ if (!adev)
+ return -ENODEV;
+
+ /* Determine the number of rings per SDMA instance
+ * (1 for sdma gfx ring, 2 if page queue exists)
+ */
+ if (adev->sdma.has_page_queue)
+ num_ring = 2;
+ else
+ num_ring = 1;
+
+ for (i = 0; i < adev->sdma.num_instances; ++i) {
+ ring = &adev->sdma.instance[i].ring;
+ if (adev->sdma.has_page_queue)
+ page = &adev->sdma.instance[i].page;
+
+ if (ring->sched.ready)
+ mask |= BIT_ULL(i * num_ring);
+ else
+ mask &= ~BIT_ULL(i * num_ring);
+
+ if (page) {
+ if (page->sched.ready)
+ mask |= BIT_ULL(i * num_ring + 1);
+ else
+ mask &= ~BIT_ULL(i * num_ring + 1);
+ }
+ }
+
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_sdma_sched_mask_fops,
+ amdgpu_debugfs_sdma_sched_mask_get,
+ amdgpu_debugfs_sdma_sched_mask_set, "%llx\n");
+
+#endif
+
+void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (!(adev->sdma.num_instances > 1))
+ return;
+ sprintf(name, "amdgpu_sdma_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_sdma_sched_mask_fops);
+#endif
+}
+
+static ssize_t amdgpu_get_sdma_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->sdma.supported_reset);
+}
+
+static DEVICE_ATTR(sdma_reset_mask, 0444,
+ amdgpu_get_sdma_reset_mask, NULL);
+
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (!amdgpu_gpu_recovery)
+ return r;
+
+ if (adev->sdma.num_instances) {
+ r = device_create_file(adev->dev, &dev_attr_sdma_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_gpu_recovery)
+ return;
+
+ if (adev->dev->kobj.sd) {
+ if (adev->sdma.num_instances)
+ device_remove_file(adev->dev, &dev_attr_sdma_reset_mask);
+ }
+}
+
+struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (adev->sdma.has_page_queue &&
+ (ring->me < adev->sdma.num_instances) &&
+ (ring == &adev->sdma.instance[ring->me].ring))
+ return &adev->sdma.instance[ring->me].page;
+ else
+ return NULL;
+}
+
+/**
+* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares a VM invalidation engine
+* @adev: Pointer to the AMDGPU device structure
+* @ring: Pointer to the ring structure to check
+*
+* This function checks if the given ring is an SDMA ring that shares a VM invalidation engine.
+* It returns true if the ring is such an SDMA ring, false otherwise.
+*/
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ int i = ring->me;
+
+ if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances)
+ return false;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ return (ring == &adev->sdma.instance[i].page);
+ else
+ return false;
+}
+
+static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id)
+{
+ struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+ int r = -EOPNOTSUPP;
+
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(4, 4, 2):
+ case IP_VERSION(4, 4, 4):
+ case IP_VERSION(4, 4, 5):
+ /* For SDMA 4.x, use the existing DPM interface for backward compatibility,
+ * we need to convert the logical instance ID to physical instance ID before reset.
+ */
+ r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, instance_id));
+ break;
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 1):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 5):
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 2):
+ case IP_VERSION(5, 2, 4):
+ case IP_VERSION(5, 2, 5):
+ case IP_VERSION(5, 2, 6):
+ case IP_VERSION(5, 2, 3):
+ case IP_VERSION(5, 2, 1):
+ case IP_VERSION(5, 2, 7):
+ if (sdma_instance->funcs->soft_reset_kernel_queue)
+ r = sdma_instance->funcs->soft_reset_kernel_queue(adev, instance_id);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+/**
+ * amdgpu_sdma_reset_engine - Reset a specific SDMA engine
+ * @adev: Pointer to the AMDGPU device
+ * @instance_id: Logical ID of the SDMA engine instance to reset
+ *
+ * Returns: 0 on success, or a negative error code on failure.
+ */
+int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
+{
+ int ret = 0;
+ struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
+ struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
+ struct amdgpu_ring *page_ring = &sdma_instance->page;
+ bool gfx_sched_stopped = false, page_sched_stopped = false;
+
+ mutex_lock(&sdma_instance->engine_reset_mutex);
+ /* Stop the scheduler's work queue for the GFX and page rings if they are running.
+ * This ensures that no new tasks are submitted to the queues while
+ * the reset is in progress.
+ */
+ if (!amdgpu_ring_sched_ready(gfx_ring)) {
+ drm_sched_wqueue_stop(&gfx_ring->sched);
+ gfx_sched_stopped = true;
+ }
+
+ if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring)) {
+ drm_sched_wqueue_stop(&page_ring->sched);
+ page_sched_stopped = true;
+ }
+
+ if (sdma_instance->funcs->stop_kernel_queue)
+ sdma_instance->funcs->stop_kernel_queue(gfx_ring);
+
+ /* Perform the SDMA reset for the specified instance */
+ ret = amdgpu_sdma_soft_reset(adev, instance_id);
+ if (ret) {
+ dev_err(adev->dev, "Failed to reset SDMA logical instance %u\n", instance_id);
+ goto exit;
+ }
+
+ if (sdma_instance->funcs->start_kernel_queue)
+ sdma_instance->funcs->start_kernel_queue(gfx_ring);
+
+exit:
+ /* Restart the scheduler's work queue for the GFX and page rings
+ * if they were stopped by this function. This allows new tasks
+ * to be submitted to the queues after the reset is complete.
+ */
+ if (!ret) {
+ if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) {
+ drm_sched_wqueue_start(&gfx_ring->sched);
+ }
+ if (page_sched_stopped && amdgpu_ring_sched_ready(page_ring)) {
+ drm_sched_wqueue_start(&page_ring->sched);
+ }
+ }
+ mutex_unlock(&sdma_instance->engine_reset_mutex);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 087ce0f6fa07..e5f8951bbb6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -50,6 +50,12 @@ enum amdgpu_sdma_irq {
#define NUM_SDMA(x) hweight32(x)
+struct amdgpu_sdma_funcs {
+ int (*stop_kernel_queue)(struct amdgpu_ring *ring);
+ int (*start_kernel_queue)(struct amdgpu_ring *ring);
+ int (*soft_reset_kernel_queue)(struct amdgpu_device *adev, u32 instance_id);
+};
+
struct amdgpu_sdma_instance {
/* SDMA firmware */
const struct firmware *fw;
@@ -64,6 +70,11 @@ struct amdgpu_sdma_instance {
struct amdgpu_bo *sdma_fw_obj;
uint64_t sdma_fw_gpu_addr;
uint32_t *sdma_fw_ptr;
+ struct mutex engine_reset_mutex;
+ /* track guilty state of GFX and PAGE queues */
+ bool gfx_guilty;
+ bool page_guilty;
+ const struct amdgpu_sdma_funcs *funcs;
};
enum amdgpu_sdma_ras_memory_id {
@@ -102,11 +113,13 @@ struct amdgpu_sdma {
struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
+ struct amdgpu_irq_src fence_irq;
struct amdgpu_irq_src ecc_irq;
struct amdgpu_irq_src vm_hole_irq;
struct amdgpu_irq_src doorbell_invalid_irq;
struct amdgpu_irq_src pool_timeout_irq;
struct amdgpu_irq_src srbm_write_irq;
+ struct amdgpu_irq_src ctxt_empty_irq;
int num_instances;
uint32_t sdma_mask;
@@ -116,6 +129,10 @@ struct amdgpu_sdma {
struct ras_common_if *ras_if;
struct amdgpu_sdma_ras *ras;
uint32_t *ip_dump;
+ uint32_t supported_reset;
+ struct list_head reset_callback_list;
+ bool no_user_submission;
+ bool disable_uq;
};
/*
@@ -155,6 +172,8 @@ struct amdgpu_buffer_funcs {
uint32_t byte_count);
};
+int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id);
+
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
@@ -175,5 +194,10 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance,
void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
bool duplicate);
int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
-
+void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev);
+int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
index e22cb2b5cd92..d45ebfb642ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
@@ -45,7 +45,11 @@
*/
static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
{
- return AMDGPU_VA_RESERVED_SEQ64_START(adev);
+ u64 addr = AMDGPU_VA_RESERVED_SEQ64_START(adev);
+
+ addr = amdgpu_gmc_sign_extend(addr);
+
+ return addr;
}
/**
@@ -63,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va)
{
+ u64 seq64_addr, va_flags;
struct amdgpu_bo *bo;
struct drm_exec exec;
- u64 seq64_addr;
int r;
bo = adev->seq64.sbo;
@@ -88,9 +92,11 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
goto error;
}
- seq64_addr = amdgpu_seq64_get_va_base(adev);
+ seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK;
+
+ va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC);
r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE,
- AMDGPU_PTE_READABLE);
+ va_flags);
if (r) {
DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
amdgpu_vm_bo_del(adev, *bo_va);
@@ -133,7 +139,7 @@ void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv)
vm = &fpriv->vm;
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
r = amdgpu_vm_lock_pd(vm, &exec, 0);
if (likely(!r))
@@ -156,6 +162,7 @@ error:
*
* @adev: amdgpu_device pointer
* @va: VA to access the seq in process address space
+ * @gpu_addr: GPU address to access the seq
* @cpu_addr: CPU address to access the seq
*
* Alloc a 64 bit memory from seq64 pool.
@@ -163,7 +170,8 @@ error:
* Returns:
* 0 on success or a negative error code on failure
*/
-int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr)
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va,
+ u64 *gpu_addr, u64 **cpu_addr)
{
unsigned long bit_pos;
@@ -172,7 +180,12 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr)
return -ENOSPC;
__set_bit(bit_pos, adev->seq64.used);
+
*va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev);
+
+ if (gpu_addr)
+ *gpu_addr = bit_pos * sizeof(u64) + adev->seq64.gpu_addr;
+
*cpu_addr = bit_pos + adev->seq64.cpu_base_addr;
return 0;
@@ -233,7 +246,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev)
*/
r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE,
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- &adev->seq64.sbo, NULL,
+ &adev->seq64.sbo, &adev->seq64.gpu_addr,
(void **)&adev->seq64.cpu_base_addr);
if (r) {
dev_warn(adev->dev, "(%d) create seq64 failed\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
index 4203b2ab318d..26a249aaaee1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h
@@ -32,13 +32,14 @@
struct amdgpu_seq64 {
struct amdgpu_bo *sbo;
u32 num_sem;
+ u64 gpu_addr;
u64 *cpu_base_addr;
DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS);
};
void amdgpu_seq64_fini(struct amdgpu_device *adev);
int amdgpu_seq64_init(struct amdgpu_device *adev);
-int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr);
+int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 *gpu_addr, u64 **cpu_addr);
void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr);
int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index c586ab4c911b..d6ae9974c952 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -135,11 +135,16 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
struct amdgpu_sync_entry *e;
hash_for_each_possible(sync->fences, e, node, f->context) {
- if (unlikely(e->fence->context != f->context))
- continue;
+ if (dma_fence_is_signaled(e->fence)) {
+ dma_fence_put(e->fence);
+ e->fence = dma_fence_get(f);
+ return true;
+ }
- amdgpu_sync_keep_later(&e->fence, f);
- return true;
+ if (likely(e->fence->context == f->context)) {
+ amdgpu_sync_keep_later(&e->fence, f);
+ return true;
+ }
}
return false;
}
@@ -149,10 +154,12 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
*
* @sync: sync object to add fence to
* @f: fence to sync to
+ * @flags: memory allocation flags to use when allocating sync entry
*
* Add the fence to the sync object.
*/
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ gfp_t flags)
{
struct amdgpu_sync_entry *e;
@@ -162,7 +169,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
if (amdgpu_sync_add_later(sync, f))
return 0;
- e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
+ e = kmem_cache_alloc(amdgpu_sync_slab, flags);
if (!e)
return -ENOMEM;
@@ -242,14 +249,13 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
if (resv == NULL)
return -EINVAL;
-
- /* TODO: Use DMA_RESV_USAGE_READ here */
- dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) {
+ /* Implicitly sync only to KERNEL, WRITE and READ */
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
dma_fence_chain_for_each(f, f) {
struct dma_fence *tmp = dma_fence_chain_contained(f);
if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
- r = amdgpu_sync_fence(sync, f);
+ r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
dma_fence_put(f);
if (r)
return r;
@@ -281,7 +287,7 @@ int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv)
if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
continue;
- r = amdgpu_sync_fence(sync, f);
+ r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
if (r)
break;
}
@@ -388,7 +394,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
- r = amdgpu_sync_fence(clone, f);
+ r = amdgpu_sync_fence(clone, f, GFP_KERNEL);
if (r)
return r;
} else {
@@ -400,6 +406,25 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
}
/**
+ * amdgpu_sync_move - move all fences from src to dst
+ *
+ * @src: source of the fences, empty after function
+ * @dst: destination for the fences
+ *
+ * Moves all fences from source to destination. All fences in destination are
+ * freed and source is empty after the function call.
+ */
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst)
+{
+ unsigned int i;
+
+ amdgpu_sync_free(dst);
+
+ for (i = 0; i < HASH_SIZE(src->fences); ++i)
+ hlist_move_list(&src->fences[i], &dst->fences[i]);
+}
+
+/**
* amdgpu_sync_push_to_job - push fences into job
* @sync: sync object to get the fences from
* @job: job to push the fences into
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index e3272dce798d..51eb4382c91e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -47,7 +47,8 @@ struct amdgpu_sync {
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ gfp_t flags);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
@@ -56,6 +57,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
+void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst);
int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 383fce40d4dd..11dd2e0f7979 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -457,6 +457,38 @@ DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed,
TP_ARGS(pasid)
);
+TRACE_EVENT(amdgpu_isolation,
+ TP_PROTO(void *prev, void *next),
+ TP_ARGS(prev, next),
+ TP_STRUCT__entry(
+ __field(void *, prev)
+ __field(void *, next)
+ ),
+
+ TP_fast_assign(
+ __entry->prev = prev;
+ __entry->next = next;
+ ),
+ TP_printk("prev=%p, next=%p",
+ __entry->prev,
+ __entry->next)
+);
+
+TRACE_EVENT(amdgpu_cleaner_shader,
+ TP_PROTO(struct amdgpu_ring *ring, struct dma_fence *fence),
+ TP_ARGS(ring, fence),
+ TP_STRUCT__entry(
+ __string(ring, ring->name)
+ __field(u64, seqno)
+ ),
+
+ TP_fast_assign(
+ __assign_str(ring);
+ __entry->seqno = fence->seqno;
+ ),
+ TP_printk("ring=%s, seqno=%Lu", __get_str(ring), __entry->seqno)
+);
+
TRACE_EVENT(amdgpu_bo_list_set,
TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
TP_ARGS(list, bo),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 74adb983ab03..9c5df35f05b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -61,7 +61,7 @@
#include "amdgpu_res_cursor.h"
#include "bif/bif_4_1_d.h"
-MODULE_IMPORT_NS(DMA_BUF);
+MODULE_IMPORT_NS("DMA_BUF");
#define AMDGPU_TTM_VRAM_MAX_DW_READ ((size_t)128)
@@ -309,7 +309,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
mutex_lock(&adev->mman.gtt_window_lock);
while (src_mm.remaining) {
uint64_t from, to, cur_size, tiling_flags;
- uint32_t num_type, data_format, max_com;
+ uint32_t num_type, data_format, max_com, write_compress_disable;
struct dma_fence *next;
/* Never copy more than 256MiB at once to avoid a timeout */
@@ -340,9 +340,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
+ write_compress_disable =
+ AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE);
copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) |
AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) |
- AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format));
+ AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) |
+ AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE,
+ write_compress_disable));
}
r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
@@ -812,7 +816,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
/* Map SG to device */
r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
if (r)
- goto release_sg;
+ goto release_sg_table;
/* convert SG to linear array of pages and dma addresses */
drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
@@ -820,6 +824,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
return 0;
+release_sg_table:
+ sg_free_table(ttm->sg);
release_sg:
kfree(ttm->sg);
ttm->sg = NULL;
@@ -1760,7 +1766,8 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
if (!adev->bios &&
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)))
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)))
reserve_size = max(reserve_size, (uint32_t)280 << 20);
else if (!reserve_size)
reserve_size = DISCOVERY_TMR_OFFSET;
@@ -1849,6 +1856,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
mutex_init(&adev->mman.gtt_window_lock);
+ dma_set_max_seg_size(adev->dev, UINT_MAX);
/* No others user of address space so set it to 0 */
r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
adev_to_drm(adev)->anon_inode->i_mapping,
@@ -1956,10 +1964,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* Compute GTT size, either based on TTM limit
* or whatever the user passed on module init.
*/
- if (amdgpu_gtt_size == -1)
- gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
- else
- gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+ gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT;
+ if (amdgpu_gtt_size != -1) {
+ uint64_t configured_size = (uint64_t)amdgpu_gtt_size << 20;
+
+ drm_warn(&adev->ddev,
+ "Configuring gttsize via module parameter is deprecated, please use ttm.pages_limit\n");
+ if (gtt_size != configured_size)
+ drm_warn(&adev->ddev,
+ "GTT size has been set as %llu but TTM size has been set as %llu, this is unusual\n",
+ configured_size, gtt_size);
+
+ gtt_size = configured_size;
+ }
/* Initialize GTT memory pool */
r = amdgpu_gtt_mgr_init(adev, gtt_size);
@@ -1970,6 +1987,11 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
(unsigned int)(gtt_size / (1024 * 1024)));
+ if (adev->flags & AMD_IS_APU) {
+ if (adev->gmc.real_vram_size < gtt_size)
+ adev->apu_prefer_gtt = true;
+ }
+
/* Initialize doorbell pool on PCI BAR */
r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE);
if (r) {
@@ -2059,9 +2081,12 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
amdgpu_vram_mgr_fini(adev);
amdgpu_gtt_mgr_fini(adev);
amdgpu_preempt_mgr_fini(adev);
+ amdgpu_doorbell_fini(adev);
+
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
+ ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
ttm_device_fini(&adev->mman.bdev);
adev->mman.initialized = false;
DRM_INFO("amdgpu: ttm finalized\n");
@@ -2272,7 +2297,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_res_cursor cursor;
u64 addr;
- int r;
+ int r = 0;
if (!adev->mman.buffer_funcs_enabled)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 138d80017f35..208b7d1d8a27 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -26,14 +26,15 @@
#include <linux/dma-direction.h>
#include <drm/gpu_scheduler.h>
+#include <drm/ttm/ttm_placement.h>
#include "amdgpu_vram_mgr.h"
-#include "amdgpu.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
#define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3)
#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4)
+#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 5)
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
@@ -118,6 +119,8 @@ struct amdgpu_copy_mem {
#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07
#define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8
#define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_SHIFT 14
+#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_MASK 0x1
#define AMDGPU_COPY_FLAGS_SET(field, value) \
(((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 4c7b53648a50..eaddc441c51a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -30,6 +30,10 @@
#define AMDGPU_UCODE_NAME_MAX (128)
+static const struct kicker_device kicker_device_list[] = {
+ {0x744B, 0x00},
+};
+
static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr)
{
DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes));
@@ -765,8 +769,10 @@ FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
+FW_VERSION_ATTR(dmcub_fw_version, 0444, dm.dmcub_fw_version);
FW_VERSION_ATTR(mes_fw_version, 0444, mes.sched_version & AMDGPU_MES_VERSION_MASK);
FW_VERSION_ATTR(mes_kiq_fw_version, 0444, mes.kiq_version & AMDGPU_MES_VERSION_MASK);
+FW_VERSION_ATTR(pldm_fw_version, 0444, firmware.pldm_version);
static struct attribute *fw_attrs[] = {
&dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
@@ -779,8 +785,9 @@ static struct attribute *fw_attrs[] = {
&dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
- &dev_attr_dmcu_fw_version.attr, &dev_attr_imu_fw_version.attr,
- &dev_attr_mes_fw_version.attr, &dev_attr_mes_kiq_fw_version.attr,
+ &dev_attr_dmcu_fw_version.attr, &dev_attr_dmcub_fw_version.attr,
+ &dev_attr_imu_fw_version.attr, &dev_attr_mes_fw_version.attr,
+ &dev_attr_mes_kiq_fw_version.attr, &dev_attr_pldm_fw_version.attr,
NULL
};
@@ -1216,6 +1223,7 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl
case IP_VERSION(11, 0, 13):
return "beige_goby";
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
return "vangogh";
case IP_VERSION(12, 0, 1):
return "green_sardine";
@@ -1383,6 +1391,19 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl
return NULL;
}
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) {
+ if (adev->pdev->device == kicker_device_list[i].device &&
+ adev->pdev->revision == kicker_device_list[i].revision)
+ return true;
+ }
+
+ return false;
+}
+
void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len)
{
int maj, min, rev;
@@ -1434,6 +1455,7 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type,
*
* @adev: amdgpu device
* @fw: pointer to load firmware to
+ * @required: whether the firmware is required
* @fmt: firmware name format string
* @...: variable arguments
*
@@ -1442,7 +1464,7 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type,
* the error code to -ENODEV, so that early_init functions will fail to load.
*/
int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
- const char *fmt, ...)
+ enum amdgpu_ucode_required required, const char *fmt, ...)
{
char fname[AMDGPU_UCODE_NAME_MAX];
va_list ap;
@@ -1456,16 +1478,24 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
return -EOVERFLOW;
}
- r = request_firmware(fw, fname, adev->dev);
+ if (required == AMDGPU_UCODE_REQUIRED)
+ r = request_firmware(fw, fname, adev->dev);
+ else {
+ r = firmware_request_nowarn(fw, fname, adev->dev);
+ if (r)
+ drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fname);
+ }
if (r)
return -ENODEV;
r = amdgpu_ucode_validate(*fw);
- if (r) {
+ if (r)
+ /*
+ * The amdgpu_ucode_request() should be paired with amdgpu_ucode_release()
+ * regardless of success/failure, and the amdgpu_ucode_release() takes care of
+ * firmware release and need to avoid redundant release FW operation here.
+ */
dev_dbg(adev->dev, "\"%s\" failed to validate\n", fname);
- release_firmware(*fw);
- *fw = NULL;
- }
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 4e23419b92d4..6349aad6da35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -25,6 +25,8 @@
#include "amdgpu_socbb.h"
+#define RS64_FW_UC_START_ADDR_LO 0x3000
+
struct common_firmware_header {
uint32_t size_bytes; /* size of the entire header+image(s) in bytes */
uint32_t header_size_bytes; /* size of just the header in bytes */
@@ -126,6 +128,7 @@ enum psp_fw_type {
PSP_FW_TYPE_PSP_DBG_DRV,
PSP_FW_TYPE_PSP_RAS_DRV,
PSP_FW_TYPE_PSP_IPKEYMGR_DRV,
+ PSP_FW_TYPE_PSP_SPDM_DRV,
PSP_FW_TYPE_MAX_INDEX,
};
@@ -163,6 +166,7 @@ enum ta_fw_type {
TA_FW_TYPE_PSP_DTM,
TA_FW_TYPE_PSP_RAP,
TA_FW_TYPE_PSP_SECUREDISPLAY,
+ TA_FW_TYPE_PSP_XGMI_AUX,
TA_FW_TYPE_MAX_INDEX,
};
@@ -550,6 +554,11 @@ enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO,
};
+enum amdgpu_ucode_required {
+ AMDGPU_UCODE_OPTIONAL,
+ AMDGPU_UCODE_REQUIRED,
+};
+
/* conform to smu_ucode_xfer_cz.h */
#define AMDGPU_SDMA0_UCODE_LOADED 0x00000001
#define AMDGPU_SDMA1_UCODE_LOADED 0x00000002
@@ -593,6 +602,12 @@ struct amdgpu_firmware {
void *fw_buf_ptr;
uint64_t fw_buf_mc;
+ uint32_t pldm_version;
+};
+
+struct kicker_device{
+ unsigned short device;
+ u8 revision;
};
void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
@@ -603,9 +618,9 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
-__printf(3, 4)
+__printf(4, 5)
int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw,
- const char *fmt, ...);
+ enum amdgpu_ucode_required required, const char *fmt, ...);
void amdgpu_ucode_release(const struct firmware **fw);
bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
@@ -622,5 +637,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id);
void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len);
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index bb7b9b2eaac1..c92b8794aa73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -78,7 +78,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
- err_data.err_addr_cnt);
+ err_data.err_addr_cnt, false);
amdgpu_ras_save_bad_pages(adev, NULL);
}
@@ -166,10 +166,11 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
if ((amdgpu_bad_page_threshold != 0) &&
err_data->err_addr_cnt) {
amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
- err_data->err_addr_cnt);
+ err_data->err_addr_cnt, false);
amdgpu_ras_save_bad_pages(adev, &err_count);
- amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
+ amdgpu_dpm_send_hbm_bad_pages_num(adev,
+ con->eeprom_control.ras_num_bad_pages);
if (con->update_channel_flag == true) {
amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
@@ -318,6 +319,9 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r
if (r)
return r;
+ if (amdgpu_sriov_vf(adev))
+ return r;
+
if (amdgpu_ras_is_supported(adev, ras_block->block)) {
r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
if (r)
@@ -383,6 +387,45 @@ int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
return 0;
}
+static int amdgpu_umc_loop_all_aid(struct amdgpu_device *adev, umc_func func,
+ void *data)
+{
+ uint32_t umc_node_inst;
+ uint32_t node_inst;
+ uint32_t umc_inst;
+ uint32_t ch_inst;
+ int ret;
+
+ /*
+ * This loop is done based on the following -
+ * umc.active mask = mask of active umc instances across all nodes
+ * umc.umc_inst_num = maximum number of umc instancess per node
+ * umc.node_inst_num = maximum number of node instances
+ * Channel instances are not assumed to be harvested.
+ */
+ dev_dbg(adev->dev, "active umcs :%lx umc_inst per node: %d",
+ adev->umc.active_mask, adev->umc.umc_inst_num);
+ for_each_set_bit(umc_node_inst, &(adev->umc.active_mask),
+ adev->umc.node_inst_num * adev->umc.umc_inst_num) {
+ node_inst = umc_node_inst / adev->umc.umc_inst_num;
+ umc_inst = umc_node_inst % adev->umc.umc_inst_num;
+ LOOP_UMC_CH_INST(ch_inst) {
+ dev_dbg(adev->dev,
+ "node_inst :%d umc_inst: %d ch_inst: %d",
+ node_inst, umc_inst, ch_inst);
+ ret = func(adev, node_inst, umc_inst, ch_inst, data);
+ if (ret) {
+ dev_err(adev->dev,
+ "Node %d umc %d ch %d func returns %d\n",
+ node_inst, umc_inst, ch_inst, ret);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
umc_func func, void *data)
{
@@ -391,6 +434,9 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
uint32_t ch_inst = 0;
int ret = 0;
+ if (adev->aid_mask)
+ return amdgpu_umc_loop_all_aid(adev, func, data);
+
if (adev->umc.node_inst_num) {
LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
ret = func(adev, node_inst, umc_inst, ch_inst, data);
@@ -441,3 +487,101 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
return ret;
}
+
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t pa_addr)
+{
+ struct ta_ras_query_address_output addr_out;
+
+ /* reinit err_data */
+ err_data->err_addr_cnt = 0;
+ err_data->err_addr_len = adev->umc.retire_unit;
+
+ addr_out.pa.pa = pa_addr;
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
+ return adev->umc.ras->convert_ras_err_addr(adev, err_data, NULL,
+ &addr_out, false);
+ else
+ return -EINVAL;
+}
+
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+ uint64_t pa_addr, uint64_t *pfns, int len)
+{
+ int i, ret;
+ struct ras_err_data err_data;
+
+ err_data.err_addr = kcalloc(adev->umc.retire_unit,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if (!err_data.err_addr) {
+ dev_warn(adev->dev, "Failed to alloc memory in bad page lookup!\n");
+ return 0;
+ }
+
+ ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < adev->umc.retire_unit; i++) {
+ if (i >= len)
+ goto out;
+
+ pfns[i] = err_data.err_addr[i].retired_page;
+ }
+ ret = i;
+ adev->umc.err_addr_cnt = err_data.err_addr_cnt;
+
+out:
+ kfree(err_data.err_addr);
+ return ret;
+}
+
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch, uint32_t umc,
+ uint32_t node, uint32_t socket,
+ struct ta_ras_query_address_output *addr_out, bool dump_addr)
+{
+ struct ta_ras_query_address_input addr_in;
+ int ret;
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = err_addr;
+ addr_in.ma.ch_inst = ch;
+ addr_in.ma.umc_inst = umc;
+ addr_in.ma.node_inst = node;
+ addr_in.ma.socket_id = socket;
+
+ if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+ ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
+ addr_out, dump_addr);
+ if (ret)
+ return ret;
+ } else {
+ return 0;
+ }
+
+ return 0;
+}
+
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+ uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps)
+{
+ struct ta_ras_query_address_input addr_in;
+ struct ta_ras_query_address_output addr_out;
+ int ret;
+
+ /* nps: the pa belongs to */
+ addr_in.pa.pa = pa | ((uint64_t)nps << 58);
+ addr_in.addr_type = TA_RAS_PA_TO_MCA;
+ ret = psp_ras_query_address(&adev->psp, &addr_in, &addr_out);
+ if (ret) {
+ dev_warn(adev->dev, "Failed to query RAS MCA address for 0x%llx",
+ pa);
+
+ return ret;
+ }
+
+ *mca = addr_out.ma.err_addr;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index ce4179db2a6d..ec203f9e5ffa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -54,6 +54,41 @@
/* Page retirement tag */
#define UMC_ECC_NEW_DETECTED_TAG 0x1
+/*
+ * a flag to indicate v2 of channel index stored in eeprom
+ *
+ * v1 (legacy way): store channel index within a umc instance in eeprom
+ * range in UMC v12: 0 ~ 7
+ * v2: store global channel index in eeprom
+ * range in UMC v12: 0 ~ 127
+ *
+ * NOTE: it's better to store it in eeprom_table_record.mem_channel,
+ * but there is only 8 bits in mem_channel, and the channel number may
+ * increase in the future, we decide to save it in
+ * eeprom_table_record.retired_page. retired_page is useless in v2,
+ * we depend on eeprom_table_record.address instead of retired_page in v2.
+ * Only 48 bits are saved on eeprom, use bit 47 here.
+ */
+#define UMC_CHANNEL_IDX_V2 BIT_ULL(47)
+
+/*
+ * save nps value to eeprom_table_record.retired_page[47:40],
+ * the channel index flag above will be retired.
+ */
+#define UMC_NPS_SHIFT 40
+#define UMC_NPS_MASK 0xffULL
+
+/* three column bits and one row bit in MCA address flip
+ * in bad page retirement
+ */
+#define RETIRE_FLIP_BITS_NUM 4
+
+struct amdgpu_umc_flip_bits {
+ uint32_t flip_bits_in_pa[RETIRE_FLIP_BITS_NUM];
+ uint32_t flip_row_bit;
+ uint32_t r13_in_pa;
+ uint32_t bit_num;
+};
typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
uint32_t umc_inst, uint32_t ch_inst, void *data);
@@ -70,6 +105,14 @@ struct amdgpu_umc_ras {
enum amdgpu_mca_error_type type, void *ras_error_status);
int (*update_ecc_status)(struct amdgpu_device *adev,
uint64_t status, uint64_t ipid, uint64_t addr);
+ int (*convert_ras_err_addr)(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out,
+ bool dump_addr);
+ uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
+ uint64_t mca_addr, uint64_t retired_page);
+ void (*get_retire_flip_bits)(struct amdgpu_device *adev);
};
struct amdgpu_umc_funcs {
@@ -100,6 +143,10 @@ struct amdgpu_umc {
/* active mask for umc node instance */
unsigned long active_mask;
+
+ struct amdgpu_umc_flip_bits flip_bits;
+
+ unsigned long err_addr_cnt;
};
int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
@@ -134,4 +181,14 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
void *ras_error_status);
+int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint64_t pa_addr);
+int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
+ uint64_t pa_addr, uint64_t *pfns, int len);
+int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
+ uint64_t err_addr, uint32_t ch, uint32_t umc,
+ uint32_t node, uint32_t socket,
+ struct ta_ras_query_address_output *addr_out, bool dump_addr);
+int amdgpu_umc_pa2mca(struct amdgpu_device *adev,
+ uint64_t pa, uint64_t *mca, enum amdgpu_memory_partition nps);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
index 6162582d0aa2..cd707d70a0bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -32,462 +32,7 @@
#include "amdgpu_umsch_mm.h"
#include "umsch_mm_v4_0.h"
-struct umsch_mm_test_ctx_data {
- uint8_t process_csa[PAGE_SIZE];
- uint8_t vpe_ctx_csa[PAGE_SIZE];
- uint8_t vcn_ctx_csa[PAGE_SIZE];
-};
-
-struct umsch_mm_test_mqd_data {
- uint8_t vpe_mqd[PAGE_SIZE];
- uint8_t vcn_mqd[PAGE_SIZE];
-};
-
-struct umsch_mm_test_ring_data {
- uint8_t vpe_ring[PAGE_SIZE];
- uint8_t vpe_ib[PAGE_SIZE];
- uint8_t vcn_ring[PAGE_SIZE];
- uint8_t vcn_ib[PAGE_SIZE];
-};
-
-struct umsch_mm_test_queue_info {
- uint64_t mqd_addr;
- uint64_t csa_addr;
- uint32_t doorbell_offset_0;
- uint32_t doorbell_offset_1;
- enum UMSCH_SWIP_ENGINE_TYPE engine;
-};
-
-struct umsch_mm_test {
- struct amdgpu_bo *ctx_data_obj;
- uint64_t ctx_data_gpu_addr;
- uint32_t *ctx_data_cpu_addr;
-
- struct amdgpu_bo *mqd_data_obj;
- uint64_t mqd_data_gpu_addr;
- uint32_t *mqd_data_cpu_addr;
-
- struct amdgpu_bo *ring_data_obj;
- uint64_t ring_data_gpu_addr;
- uint32_t *ring_data_cpu_addr;
-
-
- struct amdgpu_vm *vm;
- struct amdgpu_bo_va *bo_va;
- uint32_t pasid;
- uint32_t vm_cntx_cntl;
- uint32_t num_queues;
-};
-
-static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va,
- uint64_t addr, uint32_t size)
-{
- struct amdgpu_sync sync;
- struct drm_exec exec;
- int r;
-
- amdgpu_sync_create(&sync);
-
- drm_exec_init(&exec, 0, 0);
- drm_exec_until_all_locked(&exec) {
- r = drm_exec_lock_obj(&exec, &bo->tbo.base);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto error_fini_exec;
-
- r = amdgpu_vm_lock_pd(vm, &exec, 0);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto error_fini_exec;
- }
-
- *bo_va = amdgpu_vm_bo_add(adev, vm, bo);
- if (!*bo_va) {
- r = -ENOMEM;
- goto error_fini_exec;
- }
-
- r = amdgpu_vm_bo_map(adev, *bo_va, addr, 0, size,
- AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
- AMDGPU_PTE_EXECUTABLE);
-
- if (r)
- goto error_del_bo_va;
-
-
- r = amdgpu_vm_bo_update(adev, *bo_va, false);
- if (r)
- goto error_del_bo_va;
-
- amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update);
-
- r = amdgpu_vm_update_pdes(adev, vm, false);
- if (r)
- goto error_del_bo_va;
-
- amdgpu_sync_fence(&sync, vm->last_update);
-
- amdgpu_sync_wait(&sync, false);
- drm_exec_fini(&exec);
-
- amdgpu_sync_free(&sync);
-
- return 0;
-
-error_del_bo_va:
- amdgpu_vm_bo_del(adev, *bo_va);
- amdgpu_sync_free(&sync);
-
-error_fini_exec:
- drm_exec_fini(&exec);
- amdgpu_sync_free(&sync);
- return r;
-}
-
-static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va,
- uint64_t addr)
-{
- struct drm_exec exec;
- long r;
-
- drm_exec_init(&exec, 0, 0);
- drm_exec_until_all_locked(&exec) {
- r = drm_exec_lock_obj(&exec, &bo->tbo.base);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto out_unlock;
-
- r = amdgpu_vm_lock_pd(vm, &exec, 0);
- drm_exec_retry_on_contention(&exec);
- if (unlikely(r))
- goto out_unlock;
- }
-
-
- r = amdgpu_vm_bo_unmap(adev, bo_va, addr);
- if (r)
- goto out_unlock;
-
- amdgpu_vm_bo_del(adev, bo_va);
-
-out_unlock:
- drm_exec_fini(&exec);
-
- return r;
-}
-
-static void setup_vpe_queue(struct amdgpu_device *adev,
- struct umsch_mm_test *test,
- struct umsch_mm_test_queue_info *qinfo)
-{
- struct MQD_INFO *mqd = (struct MQD_INFO *)test->mqd_data_cpu_addr;
- uint64_t ring_gpu_addr = test->ring_data_gpu_addr;
-
- mqd->rb_base_lo = (ring_gpu_addr >> 8);
- mqd->rb_base_hi = (ring_gpu_addr >> 40);
- mqd->rb_size = PAGE_SIZE / 4;
- mqd->wptr_val = 0;
- mqd->rptr_val = 0;
- mqd->unmapped = 1;
-
- if (adev->vpe.collaborate_mode)
- memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO));
-
- qinfo->mqd_addr = test->mqd_data_gpu_addr;
- qinfo->csa_addr = test->ctx_data_gpu_addr +
- offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa);
- qinfo->doorbell_offset_0 = 0;
- qinfo->doorbell_offset_1 = 0;
-}
-
-static void setup_vcn_queue(struct amdgpu_device *adev,
- struct umsch_mm_test *test,
- struct umsch_mm_test_queue_info *qinfo)
-{
-}
-
-static int add_test_queue(struct amdgpu_device *adev,
- struct umsch_mm_test *test,
- struct umsch_mm_test_queue_info *qinfo)
-{
- struct umsch_mm_add_queue_input queue_input = {};
- int r;
-
- queue_input.process_id = test->pasid;
- queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(test->vm->root.bo);
-
- queue_input.process_va_start = 0;
- queue_input.process_va_end = (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
-
- queue_input.process_quantum = 100000; /* 10ms */
- queue_input.process_csa_addr = test->ctx_data_gpu_addr +
- offsetof(struct umsch_mm_test_ctx_data, process_csa);
-
- queue_input.context_quantum = 10000; /* 1ms */
- queue_input.context_csa_addr = qinfo->csa_addr;
-
- queue_input.inprocess_context_priority = CONTEXT_PRIORITY_LEVEL_NORMAL;
- queue_input.context_global_priority_level = CONTEXT_PRIORITY_LEVEL_NORMAL;
- queue_input.doorbell_offset_0 = qinfo->doorbell_offset_0;
- queue_input.doorbell_offset_1 = qinfo->doorbell_offset_1;
-
- queue_input.engine_type = qinfo->engine;
- queue_input.mqd_addr = qinfo->mqd_addr;
- queue_input.vm_context_cntl = test->vm_cntx_cntl;
-
- amdgpu_umsch_mm_lock(&adev->umsch_mm);
- r = adev->umsch_mm.funcs->add_queue(&adev->umsch_mm, &queue_input);
- amdgpu_umsch_mm_unlock(&adev->umsch_mm);
- if (r)
- return r;
-
- return 0;
-}
-
-static int remove_test_queue(struct amdgpu_device *adev,
- struct umsch_mm_test *test,
- struct umsch_mm_test_queue_info *qinfo)
-{
- struct umsch_mm_remove_queue_input queue_input = {};
- int r;
-
- queue_input.doorbell_offset_0 = qinfo->doorbell_offset_0;
- queue_input.doorbell_offset_1 = qinfo->doorbell_offset_1;
- queue_input.context_csa_addr = qinfo->csa_addr;
-
- amdgpu_umsch_mm_lock(&adev->umsch_mm);
- r = adev->umsch_mm.funcs->remove_queue(&adev->umsch_mm, &queue_input);
- amdgpu_umsch_mm_unlock(&adev->umsch_mm);
- if (r)
- return r;
-
- return 0;
-}
-
-static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *test)
-{
- struct MQD_INFO *mqd = (struct MQD_INFO *)test->mqd_data_cpu_addr;
- uint32_t *ring = test->ring_data_cpu_addr +
- offsetof(struct umsch_mm_test_ring_data, vpe_ring) / 4;
- uint32_t *ib = test->ring_data_cpu_addr +
- offsetof(struct umsch_mm_test_ring_data, vpe_ib) / 4;
- uint64_t ib_gpu_addr = test->ring_data_gpu_addr +
- offsetof(struct umsch_mm_test_ring_data, vpe_ib);
- uint32_t *fence = ib + 2048 / 4;
- uint64_t fence_gpu_addr = ib_gpu_addr + 2048;
- const uint32_t test_pattern = 0xdeadbeef;
- int i;
-
- ib[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0);
- ib[1] = lower_32_bits(fence_gpu_addr);
- ib[2] = upper_32_bits(fence_gpu_addr);
- ib[3] = test_pattern;
-
- ring[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0);
- ring[1] = (ib_gpu_addr & 0xffffffe0);
- ring[2] = upper_32_bits(ib_gpu_addr);
- ring[3] = 4;
- ring[4] = 0;
- ring[5] = 0;
-
- mqd->wptr_val = (6 << 2);
- if (adev->vpe.collaborate_mode)
- (++mqd)->wptr_val = (6 << 2);
-
- WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val);
-
- for (i = 0; i < adev->usec_timeout; i++) {
- if (*fence == test_pattern)
- return 0;
- udelay(1);
- }
-
- dev_err(adev->dev, "vpe queue submission timeout\n");
-
- return -ETIMEDOUT;
-}
-
-static int submit_vcn_queue(struct amdgpu_device *adev, struct umsch_mm_test *test)
-{
- return 0;
-}
-
-static int setup_umsch_mm_test(struct amdgpu_device *adev,
- struct umsch_mm_test *test)
-{
- struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
- int r;
-
- test->vm_cntx_cntl = hub->vm_cntx_cntl;
-
- test->vm = kzalloc(sizeof(*test->vm), GFP_KERNEL);
- if (!test->vm) {
- r = -ENOMEM;
- return r;
- }
-
- r = amdgpu_vm_init(adev, test->vm, -1);
- if (r)
- goto error_free_vm;
-
- r = amdgpu_pasid_alloc(16);
- if (r < 0)
- goto error_fini_vm;
- test->pasid = r;
-
- r = amdgpu_bo_create_kernel(adev, sizeof(struct umsch_mm_test_ctx_data),
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- &test->ctx_data_obj,
- &test->ctx_data_gpu_addr,
- (void **)&test->ctx_data_cpu_addr);
- if (r)
- goto error_free_pasid;
-
- memset(test->ctx_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ctx_data));
-
- r = amdgpu_bo_create_kernel(adev, PAGE_SIZE,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- &test->mqd_data_obj,
- &test->mqd_data_gpu_addr,
- (void **)&test->mqd_data_cpu_addr);
- if (r)
- goto error_free_ctx_data_obj;
-
- memset(test->mqd_data_cpu_addr, 0, PAGE_SIZE);
-
- r = amdgpu_bo_create_kernel(adev, sizeof(struct umsch_mm_test_ring_data),
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- &test->ring_data_obj,
- NULL,
- (void **)&test->ring_data_cpu_addr);
- if (r)
- goto error_free_mqd_data_obj;
-
- memset(test->ring_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ring_data));
-
- test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM;
- r = map_ring_data(adev, test->vm, test->ring_data_obj, &test->bo_va,
- test->ring_data_gpu_addr, sizeof(struct umsch_mm_test_ring_data));
- if (r)
- goto error_free_ring_data_obj;
-
- return 0;
-
-error_free_ring_data_obj:
- amdgpu_bo_free_kernel(&test->ring_data_obj, NULL,
- (void **)&test->ring_data_cpu_addr);
-error_free_mqd_data_obj:
- amdgpu_bo_free_kernel(&test->mqd_data_obj, &test->mqd_data_gpu_addr,
- (void **)&test->mqd_data_cpu_addr);
-error_free_ctx_data_obj:
- amdgpu_bo_free_kernel(&test->ctx_data_obj, &test->ctx_data_gpu_addr,
- (void **)&test->ctx_data_cpu_addr);
-error_free_pasid:
- amdgpu_pasid_free(test->pasid);
-error_fini_vm:
- amdgpu_vm_fini(adev, test->vm);
-error_free_vm:
- kfree(test->vm);
-
- return r;
-}
-
-static void cleanup_umsch_mm_test(struct amdgpu_device *adev,
- struct umsch_mm_test *test)
-{
- unmap_ring_data(adev, test->vm, test->ring_data_obj,
- test->bo_va, test->ring_data_gpu_addr);
- amdgpu_bo_free_kernel(&test->mqd_data_obj, &test->mqd_data_gpu_addr,
- (void **)&test->mqd_data_cpu_addr);
- amdgpu_bo_free_kernel(&test->ring_data_obj, NULL,
- (void **)&test->ring_data_cpu_addr);
- amdgpu_bo_free_kernel(&test->ctx_data_obj, &test->ctx_data_gpu_addr,
- (void **)&test->ctx_data_cpu_addr);
- amdgpu_pasid_free(test->pasid);
- amdgpu_vm_fini(adev, test->vm);
- kfree(test->vm);
-}
-
-static int setup_test_queues(struct amdgpu_device *adev,
- struct umsch_mm_test *test,
- struct umsch_mm_test_queue_info *qinfo)
-{
- int i, r;
-
- for (i = 0; i < test->num_queues; i++) {
- if (qinfo[i].engine == UMSCH_SWIP_ENGINE_TYPE_VPE)
- setup_vpe_queue(adev, test, &qinfo[i]);
- else
- setup_vcn_queue(adev, test, &qinfo[i]);
-
- r = add_test_queue(adev, test, &qinfo[i]);
- if (r)
- return r;
- }
-
- return 0;
-}
-
-static int submit_test_queues(struct amdgpu_device *adev,
- struct umsch_mm_test *test,
- struct umsch_mm_test_queue_info *qinfo)
-{
- int i, r;
-
- for (i = 0; i < test->num_queues; i++) {
- if (qinfo[i].engine == UMSCH_SWIP_ENGINE_TYPE_VPE)
- r = submit_vpe_queue(adev, test);
- else
- r = submit_vcn_queue(adev, test);
- if (r)
- return r;
- }
-
- return 0;
-}
-
-static void cleanup_test_queues(struct amdgpu_device *adev,
- struct umsch_mm_test *test,
- struct umsch_mm_test_queue_info *qinfo)
-{
- int i;
-
- for (i = 0; i < test->num_queues; i++)
- remove_test_queue(adev, test, &qinfo[i]);
-}
-
-static int umsch_mm_test(struct amdgpu_device *adev)
-{
- struct umsch_mm_test_queue_info qinfo[] = {
- { .engine = UMSCH_SWIP_ENGINE_TYPE_VPE },
- };
- struct umsch_mm_test test = { .num_queues = ARRAY_SIZE(qinfo) };
- int r;
-
- r = setup_umsch_mm_test(adev, &test);
- if (r)
- return r;
-
- r = setup_test_queues(adev, &test, qinfo);
- if (r)
- goto cleanup;
-
- r = submit_test_queues(adev, &test, qinfo);
- if (r)
- goto cleanup;
-
- cleanup_test_queues(adev, &test, qinfo);
- cleanup_umsch_mm_test(adev, &test);
-
- return 0;
-
-cleanup:
- cleanup_test_queues(adev, &test, qinfo);
- cleanup_umsch_mm_test(adev, &test);
- return r;
-}
+MODULE_FIRMWARE("amdgpu/umsch_mm_4_0_0.bin");
int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws)
{
@@ -581,13 +126,14 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch)
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
case IP_VERSION(4, 0, 6):
- fw_name = "amdgpu/umsch_mm_4_0_0.bin";
+ fw_name = "4_0_0";
break;
default:
- break;
+ return -EINVAL;
}
- r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, "%s", fw_name);
+ r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/umsch_mm_%s.bin", fw_name);
if (r) {
release_firmware(adev->umsch_mm.fw);
adev->umsch_mm.fw = NULL;
@@ -765,9 +311,9 @@ static int umsch_mm_init(struct amdgpu_device *adev)
}
-static int umsch_mm_early_init(void *handle)
+static int umsch_mm_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
@@ -784,19 +330,19 @@ static int umsch_mm_early_init(void *handle)
return 0;
}
-static int umsch_mm_late_init(void *handle)
+static int umsch_mm_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend)
return 0;
- return umsch_mm_test(adev);
+ return 0;
}
-static int umsch_mm_sw_init(void *handle)
+static int umsch_mm_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = umsch_mm_init(adev);
@@ -815,9 +361,9 @@ static int umsch_mm_sw_init(void *handle)
return 0;
}
-static int umsch_mm_sw_fini(void *handle)
+static int umsch_mm_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
release_firmware(adev->umsch_mm.fw);
adev->umsch_mm.fw = NULL;
@@ -839,9 +385,9 @@ static int umsch_mm_sw_fini(void *handle)
return 0;
}
-static int umsch_mm_hw_init(void *handle)
+static int umsch_mm_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = umsch_mm_load_microcode(&adev->umsch_mm);
@@ -857,9 +403,9 @@ static int umsch_mm_hw_init(void *handle)
return 0;
}
-static int umsch_mm_hw_fini(void *handle)
+static int umsch_mm_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
umsch_mm_ring_stop(&adev->umsch_mm);
@@ -873,18 +419,14 @@ static int umsch_mm_hw_fini(void *handle)
return 0;
}
-static int umsch_mm_suspend(void *handle)
+static int umsch_mm_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return umsch_mm_hw_fini(adev);
+ return umsch_mm_hw_fini(ip_block);
}
-static int umsch_mm_resume(void *handle)
+static int umsch_mm_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return umsch_mm_hw_init(adev);
+ return umsch_mm_hw_init(ip_block);
}
void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm)
@@ -997,8 +539,6 @@ static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = {
.hw_fini = umsch_mm_hw_fini,
.suspend = umsch_mm_suspend,
.resume = umsch_mm_resume,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
new file mode 100644
index 000000000000..295e7186e156
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -0,0 +1,924 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_auth.h>
+#include <drm/drm_exec.h>
+#include <linux/pm_runtime.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_userq.h"
+#include "amdgpu_userq_fence.h"
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
+{
+ int i;
+ u32 userq_ip_mask = 0;
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
+ if (adev->userq_funcs[i])
+ userq_ip_mask |= (1 << i);
+ }
+
+ return userq_ip_mask;
+}
+
+static int
+amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
+ r = userq_funcs->unmap(uq_mgr, queue);
+ if (r)
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ else
+ queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
+ }
+ return r;
+}
+
+static int
+amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *userq_funcs =
+ adev->userq_funcs[queue->queue_type];
+ int r = 0;
+
+ if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
+ r = userq_funcs->map(uq_mgr, queue);
+ if (r) {
+ queue->state = AMDGPU_USERQ_STATE_HUNG;
+ } else {
+ queue->state = AMDGPU_USERQ_STATE_MAPPED;
+ }
+ }
+ return r;
+}
+
+static void
+amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct dma_fence *f = queue->last_fence;
+ int ret;
+
+ if (f && !dma_fence_is_signaled(f)) {
+ ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
+ if (ret <= 0)
+ drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+ f->context, f->seqno);
+ }
+}
+
+static void
+amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ int queue_id)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
+
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ amdgpu_userq_fence_driver_free(queue);
+ idr_remove(&uq_mgr->userq_idr, queue_id);
+ kfree(queue);
+}
+
+int
+amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ int queue_id;
+ int ret = 0;
+
+ mutex_lock(&uq_mgr->userq_mutex);
+ /* Resume all the queues for this process */
+ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
+ ret += queue->state == AMDGPU_USERQ_STATE_MAPPED;
+
+ mutex_unlock(&uq_mgr->userq_mutex);
+ return ret;
+}
+
+static struct amdgpu_usermode_queue *
+amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
+{
+ return idr_find(&uq_mgr->userq_idr, qid);
+}
+
+void
+amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+ struct amdgpu_eviction_fence *ev_fence;
+
+retry:
+ /* Flush any pending resume work to create ev_fence */
+ flush_delayed_work(&uq_mgr->resume_work);
+
+ mutex_lock(&uq_mgr->userq_mutex);
+ spin_lock(&evf_mgr->ev_fence_lock);
+ ev_fence = evf_mgr->ev_fence;
+ spin_unlock(&evf_mgr->ev_fence_lock);
+ if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
+ mutex_unlock(&uq_mgr->userq_mutex);
+ /*
+ * Looks like there was no pending resume work,
+ * add one now to create a valid eviction fence
+ */
+ schedule_delayed_work(&uq_mgr->resume_work, 0);
+ goto retry;
+ }
+}
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj,
+ int size)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_bo_param bp;
+ int r;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ bp.type = ttm_bo_type_kernel;
+ bp.size = size;
+ bp.resv = NULL;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &userq_obj->obj);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r);
+ return r;
+ }
+
+ r = amdgpu_bo_reserve(userq_obj->obj, true);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r);
+ goto free_obj;
+ }
+
+ r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r);
+ goto unresv;
+ }
+
+ r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r);
+ goto unresv;
+ }
+
+ userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
+ amdgpu_bo_unreserve(userq_obj->obj);
+ memset(userq_obj->cpu_ptr, 0, size);
+ return 0;
+
+unresv:
+ amdgpu_bo_unreserve(userq_obj->obj);
+
+free_obj:
+ amdgpu_bo_unref(&userq_obj->obj);
+ return r;
+}
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj)
+{
+ amdgpu_bo_kunmap(userq_obj->obj);
+ amdgpu_bo_unref(&userq_obj->obj);
+}
+
+uint64_t
+amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_db_info *db_info,
+ struct drm_file *filp)
+{
+ uint64_t index;
+ struct drm_gem_object *gobj;
+ struct amdgpu_userq_obj *db_obj = db_info->db_obj;
+ int r, db_size;
+
+ gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
+ if (gobj == NULL) {
+ drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n");
+ return -EINVAL;
+ }
+
+ db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ drm_gem_object_put(gobj);
+
+ r = amdgpu_bo_reserve(db_obj->obj, true);
+ if (r) {
+ drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+ goto unref_bo;
+ }
+
+ /* Pin the BO before generating the index, unpin in queue destroy */
+ r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
+ if (r) {
+ drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
+ goto unresv_bo;
+ }
+
+ switch (db_info->queue_type) {
+ case AMDGPU_HW_IP_GFX:
+ case AMDGPU_HW_IP_COMPUTE:
+ case AMDGPU_HW_IP_DMA:
+ db_size = sizeof(u64);
+ break;
+
+ case AMDGPU_HW_IP_VCN_ENC:
+ db_size = sizeof(u32);
+ db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1;
+ break;
+
+ case AMDGPU_HW_IP_VPE:
+ db_size = sizeof(u32);
+ db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1;
+ break;
+
+ default:
+ drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
+ db_info->queue_type);
+ r = -EINVAL;
+ goto unpin_bo;
+ }
+
+ index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
+ db_info->doorbell_offset, db_size);
+ drm_dbg_driver(adev_to_drm(uq_mgr->adev),
+ "[Usermode queues] doorbell index=%lld\n", index);
+ amdgpu_bo_unreserve(db_obj->obj);
+ return index;
+
+unpin_bo:
+ amdgpu_bo_unpin(db_obj->obj);
+unresv_bo:
+ amdgpu_bo_unreserve(db_obj->obj);
+unref_bo:
+ amdgpu_bo_unref(&db_obj->obj);
+ return r;
+}
+
+static int
+amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_usermode_queue *queue;
+ int r = 0;
+
+ cancel_delayed_work_sync(&uq_mgr->resume_work);
+ mutex_lock(&uq_mgr->userq_mutex);
+
+ queue = amdgpu_userq_find(uq_mgr, queue_id);
+ if (!queue) {
+ drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
+ mutex_unlock(&uq_mgr->userq_mutex);
+ return -EINVAL;
+ }
+ amdgpu_userq_wait_for_last_fence(uq_mgr, queue);
+ r = amdgpu_bo_reserve(queue->db_obj.obj, true);
+ if (!r) {
+ amdgpu_bo_unpin(queue->db_obj.obj);
+ amdgpu_bo_unreserve(queue->db_obj.obj);
+ }
+ amdgpu_bo_unref(&queue->db_obj.obj);
+ r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
+ mutex_unlock(&uq_mgr->userq_mutex);
+
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+static int amdgpu_userq_priority_permit(struct drm_file *filp,
+ int priority)
+{
+ if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH)
+ return 0;
+
+ if (capable(CAP_SYS_NICE))
+ return 0;
+
+ if (drm_is_current_master(filp))
+ return 0;
+
+ return -EACCES;
+}
+
+static int
+amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ const struct amdgpu_userq_funcs *uq_funcs;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_db_info db_info;
+ bool skip_map_queue;
+ uint64_t index;
+ int qid, r = 0;
+ int priority =
+ (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
+ AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
+
+ /* Usermode queues are only supported for GFX IP as of now */
+ if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
+ args->in.ip_type != AMDGPU_HW_IP_DMA &&
+ args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
+ drm_file_err(uq_mgr->file, "Usermode queue doesn't support IP type %u\n",
+ args->in.ip_type);
+ return -EINVAL;
+ }
+
+ r = amdgpu_userq_priority_permit(filp, priority);
+ if (r)
+ return r;
+
+ if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
+ (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
+ (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
+ !amdgpu_is_tmz(adev)) {
+ drm_file_err(uq_mgr->file, "Secure only supported on GFX/Compute queues\n");
+ return -EINVAL;
+ }
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n");
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ /*
+ * There could be a situation that we are creating a new queue while
+ * the other queues under this UQ_mgr are suspended. So if there is any
+ * resume work pending, wait for it to get done.
+ *
+ * This will also make sure we have a valid eviction fence ready to be used.
+ */
+ mutex_lock(&adev->userq_mutex);
+ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+ uq_funcs = adev->userq_funcs[args->in.ip_type];
+ if (!uq_funcs) {
+ drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n",
+ args->in.ip_type);
+ r = -EINVAL;
+ goto unlock;
+ }
+
+ queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
+ if (!queue) {
+ drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n");
+ r = -ENOMEM;
+ goto unlock;
+ }
+ queue->doorbell_handle = args->in.doorbell_handle;
+ queue->queue_type = args->in.ip_type;
+ queue->vm = &fpriv->vm;
+ queue->priority = priority;
+
+ db_info.queue_type = queue->queue_type;
+ db_info.doorbell_handle = queue->doorbell_handle;
+ db_info.db_obj = &queue->db_obj;
+ db_info.doorbell_offset = args->in.doorbell_offset;
+
+ /* Convert relative doorbell offset into absolute doorbell index */
+ index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
+ if (index == (uint64_t)-EINVAL) {
+ drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
+ kfree(queue);
+ goto unlock;
+ }
+
+ queue->doorbell_index = index;
+ xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
+ r = amdgpu_userq_fence_driver_alloc(adev, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
+ goto unlock;
+ }
+
+ r = uq_funcs->mqd_create(uq_mgr, &args->in, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to create Queue\n");
+ amdgpu_userq_fence_driver_free(queue);
+ kfree(queue);
+ goto unlock;
+ }
+
+
+ qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
+ if (qid < 0) {
+ drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
+ amdgpu_userq_fence_driver_free(queue);
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ kfree(queue);
+ r = -ENOMEM;
+ goto unlock;
+ }
+
+ /* don't map the queue if scheduling is halted */
+ if (adev->userq_halt_for_enforce_isolation &&
+ ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
+ skip_map_queue = true;
+ else
+ skip_map_queue = false;
+ if (!skip_map_queue) {
+ r = amdgpu_userq_map_helper(uq_mgr, queue);
+ if (r) {
+ drm_file_err(uq_mgr->file, "Failed to map Queue\n");
+ idr_remove(&uq_mgr->userq_idr, qid);
+ amdgpu_userq_fence_driver_free(queue);
+ uq_funcs->mqd_destroy(uq_mgr, queue);
+ kfree(queue);
+ goto unlock;
+ }
+ }
+
+
+ args->out.queue_id = qid;
+
+unlock:
+ mutex_unlock(&uq_mgr->userq_mutex);
+ mutex_unlock(&adev->userq_mutex);
+
+ return r;
+}
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ union drm_amdgpu_userq *args = data;
+ int r;
+
+ switch (args->in.op) {
+ case AMDGPU_USERQ_OP_CREATE:
+ if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK |
+ AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE))
+ return -EINVAL;
+ r = amdgpu_userq_create(filp, args);
+ if (r)
+ drm_file_err(filp, "Failed to create usermode queue\n");
+ break;
+
+ case AMDGPU_USERQ_OP_FREE:
+ if (args->in.ip_type ||
+ args->in.doorbell_handle ||
+ args->in.doorbell_offset ||
+ args->in.flags ||
+ args->in.queue_va ||
+ args->in.queue_size ||
+ args->in.rptr_va ||
+ args->in.wptr_va ||
+ args->in.wptr_va ||
+ args->in.mqd ||
+ args->in.mqd_size)
+ return -EINVAL;
+ r = amdgpu_userq_destroy(filp, args->in.queue_id);
+ if (r)
+ drm_file_err(filp, "Failed to destroy usermode queue\n");
+ break;
+
+ default:
+ drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+static int
+amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ int queue_id;
+ int ret = 0, r;
+
+ /* Resume all the queues for this process */
+ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ r = amdgpu_userq_map_helper(uq_mgr, queue);
+ if (r)
+ ret = r;
+ }
+
+ if (ret)
+ drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
+ return ret;
+}
+
+static int
+amdgpu_userq_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ int ret;
+
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ DRM_ERROR("Fail to validate\n");
+
+ return ret;
+}
+
+static int
+amdgpu_userq_validate_bos(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_bo_va *bo_va;
+ struct ww_acquire_ctx *ticket;
+ struct drm_exec exec;
+ struct amdgpu_bo *bo;
+ struct dma_resv *resv;
+ bool clear, unlock;
+ int ret = 0;
+
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ ret = amdgpu_vm_lock_pd(vm, &exec, 2);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret)) {
+ drm_file_err(uq_mgr->file, "Failed to lock PD\n");
+ goto unlock_all;
+ }
+
+ /* Lock the done list */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+ bo = bo_va->base.bo;
+ if (!bo)
+ continue;
+
+ ret = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(ret))
+ goto unlock_all;
+ }
+ }
+
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->moved)) {
+ bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->status_lock);
+
+ /* Per VM BOs never need to bo cleared in the page tables */
+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (ret)
+ goto unlock_all;
+ spin_lock(&vm->status_lock);
+ }
+
+ ticket = &exec.ticket;
+ while (!list_empty(&vm->invalidated)) {
+ bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
+ base.vm_status);
+ resv = bo_va->base.bo->tbo.base.resv;
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_va->base.bo;
+ ret = amdgpu_userq_validate_vm_bo(NULL, bo);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Failed to validate BO\n");
+ goto unlock_all;
+ }
+
+ /* Try to reserve the BO to avoid clearing its ptes */
+ if (!adev->debug_vm && dma_resv_trylock(resv)) {
+ clear = false;
+ unlock = true;
+ /* The caller is already holding the reservation lock */
+ } else if (dma_resv_locking_ctx(resv) == ticket) {
+ clear = false;
+ unlock = false;
+ /* Somebody else is using the BO right now */
+ } else {
+ clear = true;
+ unlock = false;
+ }
+
+ ret = amdgpu_vm_bo_update(adev, bo_va, clear);
+
+ if (unlock)
+ dma_resv_unlock(resv);
+ if (ret)
+ goto unlock_all;
+
+ spin_lock(&vm->status_lock);
+ }
+ spin_unlock(&vm->status_lock);
+
+ ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
+ if (ret)
+ drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
+
+unlock_all:
+ drm_exec_fini(&exec);
+ return ret;
+}
+
+static void amdgpu_userq_restore_worker(struct work_struct *work)
+{
+ struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work);
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ int ret;
+
+ flush_work(&fpriv->evf_mgr.suspend_work.work);
+
+ mutex_lock(&uq_mgr->userq_mutex);
+
+ ret = amdgpu_userq_validate_bos(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n");
+ goto unlock;
+ }
+
+ ret = amdgpu_userq_restore_all(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Failed to restore all queues\n");
+ goto unlock;
+ }
+
+unlock:
+ mutex_unlock(&uq_mgr->userq_mutex);
+}
+
+static int
+amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ int queue_id;
+ int ret = 0, r;
+
+ /* Try to unmap all the queues in this process ctx */
+ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ r = amdgpu_userq_unmap_helper(uq_mgr, queue);
+ if (r)
+ ret = r;
+ }
+
+ if (ret)
+ drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n");
+ return ret;
+}
+
+static int
+amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
+{
+ struct amdgpu_usermode_queue *queue;
+ int queue_id, ret;
+
+ idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+ struct dma_fence *f = queue->last_fence;
+
+ if (!f || dma_fence_is_signaled(f))
+ continue;
+ ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
+ if (ret <= 0) {
+ drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
+ f->context, f->seqno);
+ return -ETIMEDOUT;
+ }
+ }
+
+ return 0;
+}
+
+void
+amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence *ev_fence)
+{
+ int ret;
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+ struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
+
+ /* Wait for any pending userqueue fence work to finish */
+ ret = amdgpu_userq_wait_for_signal(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Not evicting userqueue, timeout waiting for work\n");
+ return;
+ }
+
+ ret = amdgpu_userq_evict_all(uq_mgr);
+ if (ret) {
+ drm_file_err(uq_mgr->file, "Failed to evict userqueue\n");
+ return;
+ }
+
+ /* Signal current eviction fence */
+ amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
+
+ if (evf_mgr->fd_closing) {
+ cancel_delayed_work_sync(&uq_mgr->resume_work);
+ return;
+ }
+
+ /* Schedule a resume work */
+ schedule_delayed_work(&uq_mgr->resume_work, 0);
+}
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+ struct amdgpu_device *adev)
+{
+ mutex_init(&userq_mgr->userq_mutex);
+ idr_init_base(&userq_mgr->userq_idr, 1);
+ userq_mgr->adev = adev;
+ userq_mgr->file = file_priv;
+
+ mutex_lock(&adev->userq_mutex);
+ list_add(&userq_mgr->list, &adev->userq_mgr_list);
+ mutex_unlock(&adev->userq_mutex);
+
+ INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
+ return 0;
+}
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
+{
+ struct amdgpu_device *adev = userq_mgr->adev;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ uint32_t queue_id;
+
+ cancel_delayed_work_sync(&userq_mgr->resume_work);
+
+ mutex_lock(&adev->userq_mutex);
+ mutex_lock(&userq_mgr->userq_mutex);
+ idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) {
+ amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
+ amdgpu_userq_unmap_helper(userq_mgr, queue);
+ amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
+ }
+
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ if (uqm == userq_mgr) {
+ list_del(&uqm->list);
+ break;
+ }
+ }
+ idr_destroy(&userq_mgr->userq_idr);
+ mutex_unlock(&userq_mgr->userq_mutex);
+ mutex_unlock(&adev->userq_mutex);
+ mutex_destroy(&userq_mgr->userq_mutex);
+}
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ int queue_id;
+ int ret = 0, r;
+
+ if (!ip_mask)
+ return 0;
+
+ mutex_lock(&adev->userq_mutex);
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ cancel_delayed_work_sync(&uqm->resume_work);
+ mutex_lock(&uqm->userq_mutex);
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+ r = amdgpu_userq_unmap_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+ mutex_unlock(&adev->userq_mutex);
+ return ret;
+}
+
+int amdgpu_userq_resume(struct amdgpu_device *adev)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ int queue_id;
+ int ret = 0, r;
+
+ if (!ip_mask)
+ return 0;
+
+ mutex_lock(&adev->userq_mutex);
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ mutex_lock(&uqm->userq_mutex);
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+ r = amdgpu_userq_map_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+ mutex_unlock(&adev->userq_mutex);
+ return ret;
+}
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ int queue_id;
+ int ret = 0, r;
+
+ /* only need to stop gfx/compute */
+ if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+ return 0;
+
+ mutex_lock(&adev->userq_mutex);
+ if (adev->userq_halt_for_enforce_isolation)
+ dev_warn(adev->dev, "userq scheduling already stopped!\n");
+ adev->userq_halt_for_enforce_isolation = true;
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ cancel_delayed_work_sync(&uqm->resume_work);
+ mutex_lock(&uqm->userq_mutex);
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+ if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+ (queue->xcp_id == idx)) {
+ r = amdgpu_userq_unmap_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+ mutex_unlock(&adev->userq_mutex);
+ return ret;
+}
+
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx)
+{
+ u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_mgr *uqm, *tmp;
+ int queue_id;
+ int ret = 0, r;
+
+ /* only need to stop gfx/compute */
+ if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
+ return 0;
+
+ mutex_lock(&adev->userq_mutex);
+ if (!adev->userq_halt_for_enforce_isolation)
+ dev_warn(adev->dev, "userq scheduling already started!\n");
+ adev->userq_halt_for_enforce_isolation = false;
+ list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
+ mutex_lock(&uqm->userq_mutex);
+ idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
+ if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
+ (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
+ (queue->xcp_id == idx)) {
+ r = amdgpu_userq_map_helper(uqm, queue);
+ if (r)
+ ret = r;
+ }
+ }
+ mutex_unlock(&uqm->userq_mutex);
+ }
+ mutex_unlock(&adev->userq_mutex);
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
new file mode 100644
index 000000000000..ec040c2fd6c9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_USERQ_H_
+#define AMDGPU_USERQ_H_
+#include "amdgpu_eviction_fence.h"
+
+#define AMDGPU_MAX_USERQ_COUNT 512
+
+#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
+#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
+#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name)
+
+enum amdgpu_userq_state {
+ AMDGPU_USERQ_STATE_UNMAPPED = 0,
+ AMDGPU_USERQ_STATE_MAPPED,
+ AMDGPU_USERQ_STATE_PREEMPTED,
+ AMDGPU_USERQ_STATE_HUNG,
+};
+
+struct amdgpu_mqd_prop;
+
+struct amdgpu_userq_obj {
+ void *cpu_ptr;
+ uint64_t gpu_addr;
+ struct amdgpu_bo *obj;
+};
+
+struct amdgpu_usermode_queue {
+ int queue_type;
+ enum amdgpu_userq_state state;
+ uint64_t doorbell_handle;
+ uint64_t doorbell_index;
+ uint64_t flags;
+ struct amdgpu_mqd_prop *userq_prop;
+ struct amdgpu_userq_mgr *userq_mgr;
+ struct amdgpu_vm *vm;
+ struct amdgpu_userq_obj mqd;
+ struct amdgpu_userq_obj db_obj;
+ struct amdgpu_userq_obj fw_obj;
+ struct amdgpu_userq_obj wptr_obj;
+ struct xarray fence_drv_xa;
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct dma_fence *last_fence;
+ u32 xcp_id;
+ int priority;
+};
+
+struct amdgpu_userq_funcs {
+ int (*mqd_create)(struct amdgpu_userq_mgr *uq_mgr,
+ struct drm_amdgpu_userq_in *args,
+ struct amdgpu_usermode_queue *queue);
+ void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *uq);
+ int (*unmap)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+ int (*map)(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue);
+};
+
+/* Usermode queues for gfx */
+struct amdgpu_userq_mgr {
+ struct idr userq_idr;
+ struct mutex userq_mutex;
+ struct amdgpu_device *adev;
+ struct delayed_work resume_work;
+ struct list_head list;
+ struct drm_file *file;
+};
+
+struct amdgpu_db_info {
+ uint64_t doorbell_handle;
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ struct amdgpu_userq_obj *db_obj;
+};
+
+int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+
+int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
+ struct amdgpu_device *adev);
+
+void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr);
+
+int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj,
+ int size);
+
+void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_userq_obj *userq_obj);
+
+void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_eviction_fence *ev_fence);
+
+int amdgpu_userq_active(struct amdgpu_userq_mgr *uq_mgr);
+
+void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
+ struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_db_info *db_info,
+ struct drm_file *filp);
+
+u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev);
+
+int amdgpu_userq_suspend(struct amdgpu_device *adev);
+int amdgpu_userq_resume(struct amdgpu_device *adev);
+
+int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx);
+int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
+ u32 idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
new file mode 100644
index 000000000000..a86616c6deef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -0,0 +1,968 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <linux/dma-fence-unwrap.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_syncobj.h>
+
+#include "amdgpu.h"
+#include "amdgpu_userq_fence.h"
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops;
+static struct kmem_cache *amdgpu_userq_fence_slab;
+
+int amdgpu_userq_fence_slab_init(void)
+{
+ amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
+ sizeof(struct amdgpu_userq_fence),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!amdgpu_userq_fence_slab)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void amdgpu_userq_fence_slab_fini(void)
+{
+ rcu_barrier();
+ kmem_cache_destroy(amdgpu_userq_fence_slab);
+}
+
+static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
+{
+ if (!f || f->ops != &amdgpu_userq_fence_ops)
+ return NULL;
+
+ return container_of(f, struct amdgpu_userq_fence, base);
+}
+
+static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ return le64_to_cpu(*fence_drv->cpu_addr);
+}
+
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *userq)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ unsigned long flags;
+ int r;
+
+ fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
+ if (!fence_drv)
+ return -ENOMEM;
+
+ /* Acquire seq64 memory */
+ r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr,
+ &fence_drv->cpu_addr);
+ if (r)
+ goto free_fence_drv;
+
+ memset(fence_drv->cpu_addr, 0, sizeof(u64));
+
+ kref_init(&fence_drv->refcount);
+ INIT_LIST_HEAD(&fence_drv->fences);
+ spin_lock_init(&fence_drv->fence_list_lock);
+
+ fence_drv->adev = adev;
+ fence_drv->context = dma_fence_context_alloc(1);
+ get_task_comm(fence_drv->timeline_name, current);
+
+ xa_lock_irqsave(&adev->userq_xa, flags);
+ r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
+ fence_drv, GFP_KERNEL));
+ xa_unlock_irqrestore(&adev->userq_xa, flags);
+ if (r)
+ goto free_seq64;
+
+ userq->fence_drv = fence_drv;
+
+ return 0;
+
+free_seq64:
+ amdgpu_seq64_free(adev, fence_drv->va);
+free_fence_drv:
+ kfree(fence_drv);
+
+ return r;
+}
+
+static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ unsigned long index;
+
+ if (xa_empty(xa))
+ return;
+
+ xa_lock(xa);
+ xa_for_each(xa, index, fence_drv) {
+ __xa_erase(xa, index);
+ amdgpu_userq_fence_driver_put(fence_drv);
+ }
+
+ xa_unlock(xa);
+}
+
+void
+amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
+{
+ amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
+ xa_destroy(&userq->fence_drv_xa);
+ /* Drop the fence_drv reference held by user queue */
+ amdgpu_userq_fence_driver_put(userq->fence_drv);
+}
+
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ struct amdgpu_userq_fence *userq_fence, *tmp;
+ struct dma_fence *fence;
+ u64 rptr;
+ int i;
+
+ if (!fence_drv)
+ return;
+
+ rptr = amdgpu_userq_fence_read(fence_drv);
+
+ spin_lock(&fence_drv->fence_list_lock);
+ list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
+ fence = &userq_fence->base;
+
+ if (rptr < fence->seqno)
+ break;
+
+ dma_fence_signal(fence);
+
+ for (i = 0; i < userq_fence->fence_drv_array_count; i++)
+ amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
+
+ list_del(&userq_fence->link);
+ dma_fence_put(fence);
+ }
+ spin_unlock(&fence_drv->fence_list_lock);
+}
+
+void amdgpu_userq_fence_driver_destroy(struct kref *ref)
+{
+ struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
+ struct amdgpu_userq_fence_driver,
+ refcount);
+ struct amdgpu_userq_fence_driver *xa_fence_drv;
+ struct amdgpu_device *adev = fence_drv->adev;
+ struct amdgpu_userq_fence *fence, *tmp;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long index, flags;
+ struct dma_fence *f;
+
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+ list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
+ f = &fence->base;
+
+ if (!dma_fence_is_signaled(f)) {
+ dma_fence_set_error(f, -ECANCELED);
+ dma_fence_signal(f);
+ }
+
+ list_del(&fence->link);
+ dma_fence_put(f);
+ }
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+
+ xa_lock_irqsave(xa, flags);
+ xa_for_each(xa, index, xa_fence_drv)
+ if (xa_fence_drv == fence_drv)
+ __xa_erase(xa, index);
+ xa_unlock_irqrestore(xa, flags);
+
+ /* Free seq64 memory */
+ amdgpu_seq64_free(adev, fence_drv->va);
+ kfree(fence_drv);
+}
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ kref_get(&fence_drv->refcount);
+}
+
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
+}
+
+static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
+{
+ *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
+ return *userq_fence ? 0 : -ENOMEM;
+}
+
+static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
+ struct amdgpu_userq_fence *userq_fence,
+ u64 seq, struct dma_fence **f)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct dma_fence *fence;
+ unsigned long flags;
+
+ fence_drv = userq->fence_drv;
+ if (!fence_drv)
+ return -EINVAL;
+
+ spin_lock_init(&userq_fence->lock);
+ INIT_LIST_HEAD(&userq_fence->link);
+ fence = &userq_fence->base;
+ userq_fence->fence_drv = fence_drv;
+
+ dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
+ fence_drv->context, seq);
+
+ amdgpu_userq_fence_driver_get(fence_drv);
+ dma_fence_get(fence);
+
+ if (!xa_empty(&userq->fence_drv_xa)) {
+ struct amdgpu_userq_fence_driver *stored_fence_drv;
+ unsigned long index, count = 0;
+ int i = 0;
+
+ xa_lock(&userq->fence_drv_xa);
+ xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
+ count++;
+
+ userq_fence->fence_drv_array =
+ kvmalloc_array(count,
+ sizeof(struct amdgpu_userq_fence_driver *),
+ GFP_ATOMIC);
+
+ if (userq_fence->fence_drv_array) {
+ xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
+ userq_fence->fence_drv_array[i] = stored_fence_drv;
+ __xa_erase(&userq->fence_drv_xa, index);
+ i++;
+ }
+ }
+
+ userq_fence->fence_drv_array_count = i;
+ xa_unlock(&userq->fence_drv_xa);
+ } else {
+ userq_fence->fence_drv_array = NULL;
+ userq_fence->fence_drv_array_count = 0;
+ }
+
+ /* Check if hardware has already processed the job */
+ spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
+ if (!dma_fence_is_signaled_locked(fence))
+ list_add_tail(&userq_fence->link, &fence_drv->fences);
+ else
+ dma_fence_put(fence);
+
+ spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
+
+ *f = fence;
+
+ return 0;
+}
+
+static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
+{
+ return "amdgpu_userq_fence";
+}
+
+static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+
+ return fence->fence_drv->timeline_name;
+}
+
+static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
+{
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+ struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+ u64 rptr, wptr;
+
+ rptr = amdgpu_userq_fence_read(fence_drv);
+ wptr = fence->base.seqno;
+
+ if (rptr >= wptr)
+ return true;
+
+ return false;
+}
+
+static void amdgpu_userq_fence_free(struct rcu_head *rcu)
+{
+ struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
+ struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
+ struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
+
+ /* Release the fence driver reference */
+ amdgpu_userq_fence_driver_put(fence_drv);
+
+ kvfree(userq_fence->fence_drv_array);
+ kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+}
+
+static void amdgpu_userq_fence_release(struct dma_fence *f)
+{
+ call_rcu(&f->rcu, amdgpu_userq_fence_free);
+}
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops = {
+ .use_64bit_seqno = true,
+ .get_driver_name = amdgpu_userq_fence_get_driver_name,
+ .get_timeline_name = amdgpu_userq_fence_get_timeline_name,
+ .signaled = amdgpu_userq_fence_signaled,
+ .release = amdgpu_userq_fence_release,
+};
+
+/**
+ * amdgpu_userq_fence_read_wptr - Read the userq wptr value
+ *
+ * @queue: user mode queue structure pointer
+ * @wptr: write pointer value
+ *
+ * Read the wptr value from userq's MQD. The userq signal IOCTL
+ * creates a dma_fence for the shared buffers that expects the
+ * RPTR value written to seq64 memory >= WPTR.
+ *
+ * Returns wptr value on success, error on failure.
+ */
+static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
+ u64 *wptr)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_bo *bo;
+ u64 addr, *ptr;
+ int r;
+
+ r = amdgpu_bo_reserve(queue->vm->root.bo, false);
+ if (r)
+ return r;
+
+ addr = queue->userq_prop->wptr_gpu_addr;
+ addr &= AMDGPU_GMC_HOLE_MASK;
+
+ mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
+ if (!mapping) {
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
+ return -EINVAL;
+ }
+
+ bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
+ amdgpu_bo_unreserve(queue->vm->root.bo);
+ r = amdgpu_bo_reserve(bo, true);
+ if (r) {
+ DRM_ERROR("Failed to reserve userqueue wptr bo");
+ return r;
+ }
+
+ r = amdgpu_bo_kmap(bo, (void **)&ptr);
+ if (r) {
+ DRM_ERROR("Failed mapping the userqueue wptr bo");
+ goto map_error;
+ }
+
+ *wptr = le64_to_cpu(*ptr);
+
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return 0;
+
+map_error:
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
+
+ return r;
+}
+
+static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
+{
+ dma_fence_put(fence);
+}
+
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+ struct drm_amdgpu_userq_signal *args = data;
+ struct drm_gem_object **gobj_write = NULL;
+ struct drm_gem_object **gobj_read = NULL;
+ struct amdgpu_usermode_queue *queue;
+ struct amdgpu_userq_fence *userq_fence;
+ struct drm_syncobj **syncobj = NULL;
+ u32 *bo_handles_write, num_write_bo_handles;
+ u32 *syncobj_handles, num_syncobj_handles;
+ u32 *bo_handles_read, num_read_bo_handles;
+ int r, i, entry, rentry, wentry;
+ struct dma_fence *fence;
+ struct drm_exec exec;
+ u64 wptr;
+
+ num_syncobj_handles = args->num_syncobj_handles;
+ syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
+ size_mul(sizeof(u32), num_syncobj_handles));
+ if (IS_ERR(syncobj_handles))
+ return PTR_ERR(syncobj_handles);
+
+ /* Array of pointers to the looked up syncobjs */
+ syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
+ if (!syncobj) {
+ r = -ENOMEM;
+ goto free_syncobj_handles;
+ }
+
+ for (entry = 0; entry < num_syncobj_handles; entry++) {
+ syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
+ if (!syncobj[entry]) {
+ r = -ENOENT;
+ goto free_syncobj;
+ }
+ }
+
+ num_read_bo_handles = args->num_bo_read_handles;
+ bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
+ sizeof(u32) * num_read_bo_handles);
+ if (IS_ERR(bo_handles_read)) {
+ r = PTR_ERR(bo_handles_read);
+ goto free_syncobj;
+ }
+
+ /* Array of pointers to the GEM read objects */
+ gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+ if (!gobj_read) {
+ r = -ENOMEM;
+ goto free_bo_handles_read;
+ }
+
+ for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+ gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+ if (!gobj_read[rentry]) {
+ r = -ENOENT;
+ goto put_gobj_read;
+ }
+ }
+
+ num_write_bo_handles = args->num_bo_write_handles;
+ bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
+ sizeof(u32) * num_write_bo_handles);
+ if (IS_ERR(bo_handles_write)) {
+ r = PTR_ERR(bo_handles_write);
+ goto put_gobj_read;
+ }
+
+ /* Array of pointers to the GEM write objects */
+ gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+ if (!gobj_write) {
+ r = -ENOMEM;
+ goto free_bo_handles_write;
+ }
+
+ for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+ gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+ if (!gobj_write[wentry]) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+ }
+
+ /* Retrieve the user queue */
+ queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
+ if (!queue) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+
+ r = amdgpu_userq_fence_read_wptr(queue, &wptr);
+ if (r)
+ goto put_gobj_write;
+
+ r = amdgpu_userq_fence_alloc(&userq_fence);
+ if (r)
+ goto put_gobj_write;
+
+ /* We are here means UQ is active, make sure the eviction fence is valid */
+ amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
+
+ /* Create a new fence */
+ r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
+ if (r) {
+ mutex_unlock(&userq_mgr->userq_mutex);
+ kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+ goto put_gobj_write;
+ }
+
+ dma_fence_put(queue->last_fence);
+ queue->last_fence = dma_fence_get(fence);
+ mutex_unlock(&userq_mgr->userq_mutex);
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+ (num_read_bo_handles + num_write_bo_handles));
+
+ /* Lock all BOs with retry handling */
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ amdgpu_userq_fence_cleanup(fence);
+ goto exec_fini;
+ }
+
+ r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ amdgpu_userq_fence_cleanup(fence);
+ goto exec_fini;
+ }
+ }
+
+ for (i = 0; i < num_read_bo_handles; i++) {
+ if (!gobj_read || !gobj_read[i]->resv)
+ continue;
+
+ dma_resv_add_fence(gobj_read[i]->resv, fence,
+ DMA_RESV_USAGE_READ);
+ }
+
+ for (i = 0; i < num_write_bo_handles; i++) {
+ if (!gobj_write || !gobj_write[i]->resv)
+ continue;
+
+ dma_resv_add_fence(gobj_write[i]->resv, fence,
+ DMA_RESV_USAGE_WRITE);
+ }
+
+ /* Add the created fence to syncobj/BO's */
+ for (i = 0; i < num_syncobj_handles; i++)
+ drm_syncobj_replace_fence(syncobj[i], fence);
+
+ /* drop the reference acquired in fence creation function */
+ dma_fence_put(fence);
+
+exec_fini:
+ drm_exec_fini(&exec);
+put_gobj_write:
+ while (wentry-- > 0)
+ drm_gem_object_put(gobj_write[wentry]);
+ kfree(gobj_write);
+free_bo_handles_write:
+ kfree(bo_handles_write);
+put_gobj_read:
+ while (rentry-- > 0)
+ drm_gem_object_put(gobj_read[rentry]);
+ kfree(gobj_read);
+free_bo_handles_read:
+ kfree(bo_handles_read);
+free_syncobj:
+ while (entry-- > 0)
+ if (syncobj[entry])
+ drm_syncobj_put(syncobj[entry]);
+ kfree(syncobj);
+free_syncobj_handles:
+ kfree(syncobj_handles);
+
+ return r;
+}
+
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
+ u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
+ struct drm_amdgpu_userq_fence_info *fence_info = NULL;
+ struct drm_amdgpu_userq_wait *wait_info = data;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
+ struct amdgpu_usermode_queue *waitq;
+ struct drm_gem_object **gobj_write;
+ struct drm_gem_object **gobj_read;
+ struct dma_fence **fences = NULL;
+ u16 num_points, num_fences = 0;
+ int r, i, rentry, wentry, cnt;
+ struct drm_exec exec;
+
+ num_read_bo_handles = wait_info->num_bo_read_handles;
+ bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
+ size_mul(sizeof(u32), num_read_bo_handles));
+ if (IS_ERR(bo_handles_read))
+ return PTR_ERR(bo_handles_read);
+
+ num_write_bo_handles = wait_info->num_bo_write_handles;
+ bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
+ size_mul(sizeof(u32), num_write_bo_handles));
+ if (IS_ERR(bo_handles_write)) {
+ r = PTR_ERR(bo_handles_write);
+ goto free_bo_handles_read;
+ }
+
+ num_syncobj = wait_info->num_syncobj_handles;
+ syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
+ size_mul(sizeof(u32), num_syncobj));
+ if (IS_ERR(syncobj_handles)) {
+ r = PTR_ERR(syncobj_handles);
+ goto free_bo_handles_write;
+ }
+
+ num_points = wait_info->num_syncobj_timeline_handles;
+ timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
+ sizeof(u32) * num_points);
+ if (IS_ERR(timeline_handles)) {
+ r = PTR_ERR(timeline_handles);
+ goto free_syncobj_handles;
+ }
+
+ timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
+ sizeof(u32) * num_points);
+ if (IS_ERR(timeline_points)) {
+ r = PTR_ERR(timeline_points);
+ goto free_timeline_handles;
+ }
+
+ gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
+ if (!gobj_read) {
+ r = -ENOMEM;
+ goto free_timeline_points;
+ }
+
+ for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
+ gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
+ if (!gobj_read[rentry]) {
+ r = -ENOENT;
+ goto put_gobj_read;
+ }
+ }
+
+ gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
+ if (!gobj_write) {
+ r = -ENOMEM;
+ goto put_gobj_read;
+ }
+
+ for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
+ gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
+ if (!gobj_write[wentry]) {
+ r = -ENOENT;
+ goto put_gobj_write;
+ }
+ }
+
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
+ (num_read_bo_handles + num_write_bo_handles));
+
+ /* Lock all BOs with retry handling */
+ drm_exec_until_all_locked(&exec) {
+ r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ drm_exec_fini(&exec);
+ goto put_gobj_write;
+ }
+
+ r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
+ drm_exec_retry_on_contention(&exec);
+ if (r) {
+ drm_exec_fini(&exec);
+ goto put_gobj_write;
+ }
+ }
+
+ if (!wait_info->num_fences) {
+ if (num_points) {
+ struct dma_fence_unwrap iter;
+ struct dma_fence *fence;
+ struct dma_fence *f;
+
+ for (i = 0; i < num_points; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto exec_fini;
+
+ dma_fence_unwrap_for_each(f, &iter, fence)
+ num_fences++;
+
+ dma_fence_put(fence);
+ }
+ }
+
+ /* Count syncobj's fence */
+ for (i = 0; i < num_syncobj; i++) {
+ struct dma_fence *fence;
+
+ r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+ 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto exec_fini;
+
+ num_fences++;
+ dma_fence_put(fence);
+ }
+
+ /* Count GEM objects fence */
+ for (i = 0; i < num_read_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+ DMA_RESV_USAGE_READ, fence)
+ num_fences++;
+ }
+
+ for (i = 0; i < num_write_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+ DMA_RESV_USAGE_WRITE, fence)
+ num_fences++;
+ }
+
+ /*
+ * Passing num_fences = 0 means that userspace doesn't want to
+ * retrieve userq_fence_info. If num_fences = 0 we skip filling
+ * userq_fence_info and return the actual number of fences on
+ * args->num_fences.
+ */
+ wait_info->num_fences = num_fences;
+ } else {
+ /* Array of fence info */
+ fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
+ if (!fence_info) {
+ r = -ENOMEM;
+ goto exec_fini;
+ }
+
+ /* Array of fences */
+ fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
+ if (!fences) {
+ r = -ENOMEM;
+ goto free_fence_info;
+ }
+
+ /* Retrieve GEM read objects fence */
+ for (i = 0; i < num_read_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
+ DMA_RESV_USAGE_READ, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ dma_fence_get(fence);
+ }
+ }
+
+ /* Retrieve GEM write objects fence */
+ for (i = 0; i < num_write_bo_handles; i++) {
+ struct dma_resv_iter resv_cursor;
+ struct dma_fence *fence;
+
+ dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
+ DMA_RESV_USAGE_WRITE, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ dma_fence_get(fence);
+ }
+ }
+
+ if (num_points) {
+ struct dma_fence_unwrap iter;
+ struct dma_fence *fence;
+ struct dma_fence *f;
+
+ for (i = 0; i < num_points; i++) {
+ r = drm_syncobj_find_fence(filp, timeline_handles[i],
+ timeline_points[i],
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto free_fences;
+
+ dma_fence_unwrap_for_each(f, &iter, fence) {
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ dma_fence_get(f);
+ fences[num_fences++] = f;
+ }
+
+ dma_fence_put(fence);
+ }
+ }
+
+ /* Retrieve syncobj's fence */
+ for (i = 0; i < num_syncobj; i++) {
+ struct dma_fence *fence;
+
+ r = drm_syncobj_find_fence(filp, syncobj_handles[i],
+ 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
+ &fence);
+ if (r)
+ goto free_fences;
+
+ if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ fences[num_fences++] = fence;
+ }
+
+ /*
+ * Keep only the latest fences to reduce the number of values
+ * given back to userspace.
+ */
+ num_fences = dma_fence_dedup_array(fences, num_fences);
+
+ waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id);
+ if (!waitq) {
+ r = -EINVAL;
+ goto free_fences;
+ }
+
+ for (i = 0, cnt = 0; i < num_fences; i++) {
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct amdgpu_userq_fence *userq_fence;
+ u32 index;
+
+ userq_fence = to_amdgpu_userq_fence(fences[i]);
+ if (!userq_fence) {
+ /*
+ * Just waiting on other driver fences should
+ * be good for now
+ */
+ r = dma_fence_wait(fences[i], true);
+ if (r) {
+ dma_fence_put(fences[i]);
+ goto free_fences;
+ }
+
+ dma_fence_put(fences[i]);
+ continue;
+ }
+
+ fence_drv = userq_fence->fence_drv;
+ /*
+ * We need to make sure the user queue release their reference
+ * to the fence drivers at some point before queue destruction.
+ * Otherwise, we would gather those references until we don't
+ * have any more space left and crash.
+ */
+ r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
+ xa_limit_32b, GFP_KERNEL);
+ if (r)
+ goto free_fences;
+
+ amdgpu_userq_fence_driver_get(fence_drv);
+
+ /* Store drm syncobj's gpu va address and value */
+ fence_info[cnt].va = fence_drv->va;
+ fence_info[cnt].value = fences[i]->seqno;
+
+ dma_fence_put(fences[i]);
+ /* Increment the actual userq fence count */
+ cnt++;
+ }
+
+ wait_info->num_fences = cnt;
+ /* Copy userq fence info to user space */
+ if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
+ fence_info, wait_info->num_fences * sizeof(*fence_info))) {
+ r = -EFAULT;
+ goto free_fences;
+ }
+
+ kfree(fences);
+ kfree(fence_info);
+ }
+
+ drm_exec_fini(&exec);
+ for (i = 0; i < num_read_bo_handles; i++)
+ drm_gem_object_put(gobj_read[i]);
+ kfree(gobj_read);
+
+ for (i = 0; i < num_write_bo_handles; i++)
+ drm_gem_object_put(gobj_write[i]);
+ kfree(gobj_write);
+
+ kfree(timeline_points);
+ kfree(timeline_handles);
+ kfree(syncobj_handles);
+ kfree(bo_handles_write);
+ kfree(bo_handles_read);
+
+ return 0;
+
+free_fences:
+ while (num_fences-- > 0)
+ dma_fence_put(fences[num_fences]);
+ kfree(fences);
+free_fence_info:
+ kfree(fence_info);
+exec_fini:
+ drm_exec_fini(&exec);
+put_gobj_write:
+ while (wentry-- > 0)
+ drm_gem_object_put(gobj_write[wentry]);
+ kfree(gobj_write);
+put_gobj_read:
+ while (rentry-- > 0)
+ drm_gem_object_put(gobj_read[rentry]);
+ kfree(gobj_read);
+free_timeline_points:
+ kfree(timeline_points);
+free_timeline_handles:
+ kfree(timeline_handles);
+free_syncobj_handles:
+ kfree(syncobj_handles);
+free_bo_handles_write:
+ kfree(bo_handles_write);
+free_bo_handles_read:
+ kfree(bo_handles_read);
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
new file mode 100644
index 000000000000..97a125ab8a78
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_USERQ_FENCE_H__
+#define __AMDGPU_USERQ_FENCE_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_userq.h"
+
+struct amdgpu_userq_fence {
+ struct dma_fence base;
+ /*
+ * This lock is necessary to synchronize the
+ * userqueue dma fence operations.
+ */
+ spinlock_t lock;
+ struct list_head link;
+ unsigned long fence_drv_array_count;
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct amdgpu_userq_fence_driver **fence_drv_array;
+};
+
+struct amdgpu_userq_fence_driver {
+ struct kref refcount;
+ u64 va;
+ u64 gpu_addr;
+ u64 *cpu_addr;
+ u64 context;
+ /*
+ * This lock is necesaary to synchronize the access
+ * to the fences list by the fence driver.
+ */
+ spinlock_t fence_list_lock;
+ struct list_head fences;
+ struct amdgpu_device *adev;
+ char timeline_name[TASK_COMM_LEN];
+};
+
+int amdgpu_userq_fence_slab_init(void);
+void amdgpu_userq_fence_slab_fini(void);
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_destroy(struct kref *ref);
+int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 31fd30dcd593..74758b5ffc6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -260,7 +260,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
return -EINVAL;
}
- r = amdgpu_ucode_request(adev, &adev->uvd.fw, "%s", fw_name);
+ r = amdgpu_ucode_request(adev, &adev->uvd.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
if (r) {
dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n",
fw_name);
@@ -551,6 +551,8 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo)
for (i = 0; i < abo->placement.num_placement; ++i) {
abo->placements[i].fpfn = 0 >> PAGE_SHIFT;
abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+ if (abo->placements[i].mem_type == TTM_PL_VRAM)
+ abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 74fdbf71d95b..b9060bcd4806 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -158,7 +158,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
return -EINVAL;
}
- r = amdgpu_ucode_request(adev, &adev->vce.fw, "%s", fw_name);
+ r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name);
if (r) {
dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
fw_name);
@@ -214,15 +214,15 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
drm_sched_entity_destroy(&adev->vce.entity);
- amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
- (void **)&adev->vce.cpu_addr);
-
for (i = 0; i < adev->vce.num_rings; i++)
amdgpu_ring_fini(&adev->vce.ring[i]);
amdgpu_ucode_release(&adev->vce.fw);
mutex_destroy(&adev->vce.idle_mutex);
+ amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
+ (void **)&adev->vce.cpu_addr);
+
return 0;
}
@@ -503,7 +503,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[i] = 0x0;
r = amdgpu_job_submit_direct(job, ring, &f);
- amdgpu_ib_free(ring->adev, &ib_msg, f);
+ amdgpu_ib_free(&ib_msg, f);
if (r)
goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 43f44cc201cb..c8885c3d54b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2024 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -62,6 +62,7 @@
#define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin"
#define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin"
#define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin"
+#define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
@@ -88,46 +89,57 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_5);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_6);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1);
MODULE_FIRMWARE(FIRMWARE_VCN5_0_0);
+MODULE_FIRMWARE(FIRMWARE_VCN5_0_1);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
-int amdgpu_vcn_early_init(struct amdgpu_device *adev)
+int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i)
{
char ucode_prefix[25];
- int r, i;
+ int r;
+ adev->vcn.inst[i].adev = adev;
+ adev->vcn.inst[i].inst = i;
amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (i == 1 && amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6))
- r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s_%d.bin", ucode_prefix, i);
- else
- r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s.bin", ucode_prefix);
- if (r) {
- amdgpu_ucode_release(&adev->vcn.fw[i]);
- return r;
+
+ if (i != 0 && adev->vcn.per_inst_fw) {
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_%d.bin", ucode_prefix, i);
+ if (r)
+ amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+ } else {
+ if (!adev->vcn.inst[0].fw) {
+ r = amdgpu_ucode_request(adev, &adev->vcn.inst[0].fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", ucode_prefix);
+ if (r)
+ amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+ } else {
+ r = 0;
}
+ adev->vcn.inst[i].fw = adev->vcn.inst[0].fw;
}
+
return r;
}
-int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i)
{
unsigned long bo_size;
const struct common_firmware_header *hdr;
unsigned char fw_check;
unsigned int fw_shared_size, log_offset;
- int i, r;
-
- INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
- mutex_init(&adev->vcn.vcn_pg_lock);
- mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
- atomic_set(&adev->vcn.total_submission_cnt, 0);
- for (i = 0; i < adev->vcn.num_vcn_inst; i++)
- atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
+ int r;
+ mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
+ mutex_init(&adev->vcn.inst[i].vcn_pg_lock);
+ atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0);
+ INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler);
+ atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
- adev->vcn.indirect_sram = true;
+ adev->vcn.inst[i].indirect_sram = true;
/*
* Some Steam Deck's BIOS versions are incompatible with the
@@ -140,18 +152,19 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION);
if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) ||
- !strncmp("F7A0114", bios_ver, 7))) {
- adev->vcn.indirect_sram = false;
+ !strncmp("F7A0114", bios_ver, 7))) {
+ adev->vcn.inst[i].indirect_sram = false;
dev_info(adev->dev,
- "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
+ "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
}
}
/* from vcn4 and above, only unified queue is used */
- adev->vcn.using_unified_queue =
+ adev->vcn.inst[i].using_unified_queue =
amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0);
- hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ adev->vcn.inst[i].fw_version = le32_to_cpu(hdr->ucode_version);
adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
/* Bit 20-23, it is encode major and non-zero for new naming convention.
@@ -169,16 +182,17 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
enc_major = fw_check;
dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
- DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
- enc_major, enc_minor, dec_ver, vep, fw_rev);
+ dev_info(adev->dev,
+ "Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
+ enc_major, enc_minor, dec_ver, vep, fw_rev);
} else {
unsigned int version_major, version_minor, family_id;
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
- DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",
- version_major, version_minor, family_id);
+ dev_info(adev->dev, "Found VCN firmware Version: %u.%u Family ID: %u\n",
+ version_major, version_minor, family_id);
}
bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
@@ -201,80 +215,77 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
if (amdgpu_vcnfw_log)
bo_size += AMDGPU_VCNFW_LOG_SIZE;
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr,
+ &adev->vcn.inst[i].cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+ return r;
+ }
+
+ adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
+ bo_size - fw_shared_size;
+ adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
+ bo_size - fw_shared_size;
+
+ adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
+
+ if (amdgpu_vcnfw_log) {
+ adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
+ adev->vcn.inst[i].fw_shared.log_offset = log_offset;
+ }
- r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+ if (adev->vcn.inst[i].indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
- &adev->vcn.inst[i].vcpu_bo,
- &adev->vcn.inst[i].gpu_addr,
- &adev->vcn.inst[i].cpu_addr);
+ &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr,
+ &adev->vcn.inst[i].dpg_sram_cpu_addr);
if (r) {
- dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+ dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
return r;
}
-
- adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr +
- bo_size - fw_shared_size;
- adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr +
- bo_size - fw_shared_size;
-
- adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size;
-
- if (amdgpu_vcnfw_log) {
- adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
- adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE;
- adev->vcn.inst[i].fw_shared.log_offset = log_offset;
- }
-
- if (adev->vcn.indirect_sram) {
- r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT,
- &adev->vcn.inst[i].dpg_sram_bo,
- &adev->vcn.inst[i].dpg_sram_gpu_addr,
- &adev->vcn.inst[i].dpg_sram_cpu_addr);
- if (r) {
- dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
- return r;
- }
- }
}
return 0;
}
-int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i)
{
- int i, j;
+ int j;
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- amdgpu_bo_free_kernel(
- &adev->vcn.inst[j].dpg_sram_bo,
- &adev->vcn.inst[j].dpg_sram_gpu_addr,
- (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
+ amdgpu_bo_free_kernel(
+ &adev->vcn.inst[i].dpg_sram_bo,
+ &adev->vcn.inst[i].dpg_sram_gpu_addr,
+ (void **)&adev->vcn.inst[i].dpg_sram_cpu_addr);
- kvfree(adev->vcn.inst[j].saved_bo);
+ kvfree(adev->vcn.inst[i].saved_bo);
- amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
- &adev->vcn.inst[j].gpu_addr,
- (void **)&adev->vcn.inst[j].cpu_addr);
+ amdgpu_bo_free_kernel(&adev->vcn.inst[i].vcpu_bo,
+ &adev->vcn.inst[i].gpu_addr,
+ (void **)&adev->vcn.inst[i].cpu_addr);
- amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
+ amdgpu_ring_fini(&adev->vcn.inst[i].ring_dec);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+ amdgpu_ring_fini(&adev->vcn.inst[i].ring_enc[j]);
- amdgpu_ucode_release(&adev->vcn.fw[j]);
+ if (adev->vcn.per_inst_fw) {
+ amdgpu_ucode_release(&adev->vcn.inst[i].fw);
+ } else {
+ amdgpu_ucode_release(&adev->vcn.inst[0].fw);
+ adev->vcn.inst[i].fw = NULL;
}
-
- mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
- mutex_destroy(&adev->vcn.vcn_pg_lock);
+ mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock);
+ mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround);
return 0;
}
@@ -282,7 +293,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
{
bool ret = false;
- int vcn_config = adev->vcn.vcn_config[vcn_instance];
+ int vcn_config = adev->vcn.inst[vcn_instance].vcn_config;
if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK))
ret = true;
@@ -294,173 +305,208 @@ bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type t
return ret;
}
-int amdgpu_vcn_suspend(struct amdgpu_device *adev)
+static int amdgpu_vcn_save_vcpu_bo_inst(struct amdgpu_device *adev, int i)
{
unsigned int size;
void *ptr;
- int i, idx;
+ int idx;
- bool in_ras_intr = amdgpu_ras_intr_triggered();
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return 0;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
- /* err_event_athub will corrupt VCPU buffer, so we need to
- * restore fw data and clear buffer in amdgpu_vcn_resume() */
- if (in_ras_intr)
- return 0;
+ adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
+ if (!adev->vcn.inst[i].saved_bo)
+ return -ENOMEM;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->vcn.inst[i].vcpu_bo == NULL)
- return 0;
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
+ drm_dev_exit(idx);
+ }
- size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
- ptr = adev->vcn.inst[i].cpu_addr;
+ return 0;
+}
- adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
- if (!adev->vcn.inst[i].saved_bo)
- return -ENOMEM;
+int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev)
+{
+ int ret, i;
- if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
- drm_dev_exit(idx);
- }
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = amdgpu_vcn_save_vcpu_bo_inst(adev, i);
+ if (ret)
+ return ret;
}
+
return 0;
}
-int amdgpu_vcn_resume(struct amdgpu_device *adev)
+int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i)
+{
+ bool in_ras_intr = amdgpu_ras_intr_triggered();
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+
+ cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
+
+ /* err_event_athub and dpc recovery will corrupt VCPU buffer, so we need to
+ * restore fw data and clear buffer in amdgpu_vcn_resume() */
+ if (in_ras_intr || adev->pcie_reset_ctx.in_link_reset)
+ return 0;
+
+ return amdgpu_vcn_save_vcpu_bo_inst(adev, i);
+}
+
+int amdgpu_vcn_resume(struct amdgpu_device *adev, int i)
{
unsigned int size;
void *ptr;
- int i, idx;
+ int idx;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->vcn.inst[i].vcpu_bo == NULL)
- return -EINVAL;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
+ if (adev->vcn.inst[i].vcpu_bo == NULL)
+ return -EINVAL;
+
+ size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
+ ptr = adev->vcn.inst[i].cpu_addr;
- size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
- ptr = adev->vcn.inst[i].cpu_addr;
+ if (adev->vcn.inst[i].saved_bo != NULL) {
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+ drm_dev_exit(idx);
+ }
+ kvfree(adev->vcn.inst[i].saved_bo);
+ adev->vcn.inst[i].saved_bo = NULL;
+ } else {
+ const struct common_firmware_header *hdr;
+ unsigned int offset;
- if (adev->vcn.inst[i].saved_bo != NULL) {
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
+ memcpy_toio(adev->vcn.inst[i].cpu_addr,
+ adev->vcn.inst[i].fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
drm_dev_exit(idx);
}
- kvfree(adev->vcn.inst[i].saved_bo);
- adev->vcn.inst[i].saved_bo = NULL;
- } else {
- const struct common_firmware_header *hdr;
- unsigned int offset;
-
- hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data;
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- if (drm_dev_enter(adev_to_drm(adev), &idx)) {
- memcpy_toio(adev->vcn.inst[i].cpu_addr,
- adev->vcn.fw[i]->data + offset,
- le32_to_cpu(hdr->ucode_size_bytes));
- drm_dev_exit(idx);
- }
- size -= le32_to_cpu(hdr->ucode_size_bytes);
- ptr += le32_to_cpu(hdr->ucode_size_bytes);
- }
- memset_io(ptr, 0, size);
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
}
+ memset_io(ptr, 0, size);
}
+
return 0;
}
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
- struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
- unsigned int i, j;
+ unsigned int i = vcn_inst->inst, j;
int r = 0;
- for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
- if (adev->vcn.harvest_config & (1 << j))
- continue;
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j)
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]);
- /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
- !adev->vcn.using_unified_queue) {
- struct dpg_pause_state new_state;
-
- if (fence[j] ||
- unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
- new_state.fw_based = VCN_DPG_STATE__PAUSE;
- else
- new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !adev->vcn.inst[i].using_unified_queue) {
+ struct dpg_pause_state new_state;
- adev->vcn.pause_dpg_mode(adev, j, &new_state);
- }
+ if (fence[i] ||
+ unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt)))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
- fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
- fences += fence[j];
+ adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state);
}
- if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_GATE);
- r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
- false);
- if (r)
- dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+ fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec);
+ fences += fence[i];
+
+ if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
+ vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
+ mutex_lock(&adev->vcn.workload_profile_mutex);
+ if (adev->vcn.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+ false);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+ adev->vcn.workload_profile_active = false;
+ }
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
} else {
- schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
}
}
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me];
int r = 0;
- atomic_inc(&adev->vcn.total_submission_cnt);
+ atomic_inc(&vcn_inst->total_submission_cnt);
+
+ cancel_delayed_work_sync(&vcn_inst->idle_work);
- if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
+ /* We can safely return early here because we've cancelled the
+ * the delayed work so there is no one else to set it to false
+ * and we don't care if someone else sets it to true.
+ */
+ if (adev->vcn.workload_profile_active)
+ goto pg_lock;
+
+ mutex_lock(&adev->vcn.workload_profile_mutex);
+ if (!adev->vcn.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
- true);
+ true);
if (r)
dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
+ adev->vcn.workload_profile_active = true;
}
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
- mutex_lock(&adev->vcn.vcn_pg_lock);
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
- AMD_PG_STATE_UNGATE);
+pg_lock:
+ mutex_lock(&vcn_inst->vcn_pg_lock);
+ vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
- !adev->vcn.using_unified_queue) {
+ !vcn_inst->using_unified_queue) {
struct dpg_pause_state new_state;
if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
- atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+ atomic_inc(&vcn_inst->dpg_enc_submission_cnt);
new_state.fw_based = VCN_DPG_STATE__PAUSE;
} else {
unsigned int fences = 0;
unsigned int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
- fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);
+ for (i = 0; i < vcn_inst->num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[i]);
- if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
+ if (fences || atomic_read(&vcn_inst->dpg_enc_submission_cnt))
new_state.fw_based = VCN_DPG_STATE__PAUSE;
else
new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
}
- adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
+ vcn_inst->pause_dpg_mode(vcn_inst, &new_state);
}
- mutex_unlock(&adev->vcn.vcn_pg_lock);
+ mutex_unlock(&vcn_inst->vcn_pg_lock);
}
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
@@ -470,12 +516,13 @@ void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
/* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&
- !adev->vcn.using_unified_queue)
+ !adev->vcn.inst[ring->me].using_unified_queue)
atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
- atomic_dec(&ring->adev->vcn.total_submission_cnt);
+ atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt);
- schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work,
+ VCN_IDLE_TIMEOUT);
}
int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
@@ -493,7 +540,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
@@ -558,14 +605,14 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
goto err;
ib = &job->ibs[0];
- ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
+ ib->ptr[0] = PACKET0(adev->vcn.inst[ring->me].internal.data0, 0);
ib->ptr[1] = addr;
- ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
+ ib->ptr[2] = PACKET0(adev->vcn.inst[ring->me].internal.data1, 0);
ib->ptr[3] = addr >> 32;
- ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
+ ib->ptr[4] = PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0);
ib->ptr[5] = 0;
for (i = 6; i < 16; i += 2) {
- ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
+ ib->ptr[i] = PACKET0(adev->vcn.inst[ring->me].internal.nop, 0);
ib->ptr[i+1] = 0;
}
ib->length_dw = 16;
@@ -574,7 +621,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
if (r)
goto err_free;
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
if (fence)
*fence = dma_fence_get(f);
@@ -585,7 +632,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
err_free:
amdgpu_job_free(job);
err:
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
return r;
}
@@ -728,7 +775,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
uint32_t ib_pack_in_dw;
int i, r;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
@@ -741,7 +788,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
ib->length_dw = 0;
/* single queue headers */
- if (adev->vcn.using_unified_queue) {
+ if (adev->vcn.inst[ring->me].using_unified_queue) {
ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
+ 4 + 2; /* engine info + decoding ib in dw */
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
@@ -760,14 +807,14 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err_free;
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
if (fence)
*fence = dma_fence_get(f);
@@ -778,7 +825,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
err_free:
amdgpu_job_free(job);
err:
- amdgpu_ib_free(adev, ib_msg, f);
+ amdgpu_ib_free(ib_msg, f);
return r;
}
@@ -858,7 +905,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
uint64_t addr;
int i, r;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
@@ -872,7 +919,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
ib->length_dw = 0;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
ib->ptr[ib->length_dw++] = 0x00000018;
@@ -894,7 +941,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
r = amdgpu_job_submit_direct(job, ring, &f);
@@ -925,7 +972,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
uint64_t addr;
int i, r;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_size_dw += 8;
r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
@@ -939,7 +986,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
ib->length_dw = 0;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
ib->ptr[ib->length_dw++] = 0x00000018;
@@ -961,7 +1008,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (adev->vcn.using_unified_queue)
+ if (adev->vcn.inst[ring->me].using_unified_queue)
amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
r = amdgpu_job_submit_direct(job, ring, &f);
@@ -1008,7 +1055,7 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = 0;
error:
- amdgpu_ib_free(adev, &ib, fence);
+ amdgpu_ib_free(&ib, fence);
dma_fence_put(fence);
return r;
@@ -1019,7 +1066,8 @@ int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
long r;
- if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) {
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) &&
+ (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) {
r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
if (r)
goto error;
@@ -1045,34 +1093,32 @@ enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
}
}
-void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i)
{
- int i;
unsigned int idx;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
const struct common_firmware_header *hdr;
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
-
- hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data;
- /* currently only support 2 FW instances */
- if (i >= 2) {
- dev_info(adev->dev, "More then 2 VCN FW instances!\n");
- break;
- }
- idx = AMDGPU_UCODE_ID_VCN + i;
- adev->firmware.ucode[idx].ucode_id = idx;
- adev->firmware.ucode[idx].fw = adev->vcn.fw[i];
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
-
- if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
- IP_VERSION(4, 0, 3))
- break;
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
+
+ if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) ||
+ amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1))
+ && (i > 0))
+ return;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data;
+ /* currently only support 2 FW instances */
+ if (i >= 2) {
+ dev_info(adev->dev, "More then 2 VCN FW instances!\n");
+ return;
}
+ idx = AMDGPU_UCODE_ID_VCN + i;
+ adev->firmware.ucode[idx].ucode_id = idx;
+ adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
}
}
@@ -1277,3 +1323,131 @@ int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
return psp_execute_ip_fw_load(&adev->psp, &ucode);
}
+
+static ssize_t amdgpu_get_vcn_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->vcn.supported_reset);
+}
+
+static DEVICE_ATTR(vcn_reset_mask, 0444,
+ amdgpu_get_vcn_reset_mask, NULL);
+
+int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->vcn.num_vcn_inst) {
+ r = device_create_file(adev->dev, &dev_attr_vcn_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->vcn.num_vcn_inst)
+ device_remove_file(adev->dev, &dev_attr_vcn_reset_mask);
+ }
+}
+
+/*
+ * debugfs to enable/disable vcn job submission to specific core or
+ * instance. It is created only if the queue type is unified.
+ */
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+
+ mask = (1ULL << adev->vcn.num_vcn_inst) - 1;
+ if ((val & mask) == 0)
+ return -EINVAL;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ if (val & (1ULL << i))
+ ring->sched.ready = true;
+ else
+ ring->sched.ready = false;
+ }
+ /* publish sched.ready flag update effective immediately across smp */
+ smp_rmb();
+ return 0;
+}
+
+static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ u32 i;
+ u64 mask = 0;
+ struct amdgpu_ring *ring;
+
+ if (!adev)
+ return -ENODEV;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ if (ring->sched.ready)
+ mask |= 1ULL << i;
+ }
+ *val = mask;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops,
+ amdgpu_debugfs_vcn_sched_mask_get,
+ amdgpu_debugfs_vcn_sched_mask_set, "%llx\n");
+#endif
+
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[32];
+
+ if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.inst[0].using_unified_queue)
+ return;
+ sprintf(name, "amdgpu_vcn_sched_mask");
+ debugfs_create_file(name, 0600, root, adev,
+ &amdgpu_debugfs_vcn_sched_mask_fops);
+#endif
+}
+
+/**
+ * vcn_set_powergating_state - set VCN block powergating state
+ *
+ * @ip_block: amdgpu_ip_block pointer
+ * @state: power gating state
+ *
+ * Set VCN block powergating state
+ */
+int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ ret |= vinst->set_pg_state(vinst, state);
+ }
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 2a1f3dbb14d3..83adf81defc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2016 Advanced Micro Devices, Inc.
+ * Copyright 2016-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -66,7 +66,6 @@
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
#define VCN_AON_SOC_ADDRESS_2_0 0x1f800
-#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define VCN_VID_IP_ADDRESS_2_0 0x0
#define VCN_AON_IP_ADDRESS_2_0 0x30000
@@ -163,20 +162,30 @@
#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \
({ \
uint32_t internal_reg_offset, addr; \
- bool video_range, aon_range; \
+ bool video_range, video1_range, aon_range, aon1_range; \
\
addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \
addr <<= 2; \
video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) && \
((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600))))); \
+ video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS + 0x2600))))); \
aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) && \
((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600))))); \
+ aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS)) && \
+ ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS + 0x600))))); \
if (video_range) \
internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) + \
(VCN_VID_IP_ADDRESS)); \
else if (aon_range) \
internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) + \
(VCN_AON_IP_ADDRESS)); \
+ else if (video1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS) + \
+ (VCN_VID_IP_ADDRESS)); \
+ else if (aon1_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS) + \
+ (VCN_AON_IP_ADDRESS)); \
else \
internal_reg_offset = (0xFFFFF & addr); \
\
@@ -228,6 +237,12 @@
#define AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING 2
+enum amdgpu_vcn_caps {
+ AMDGPU_VCN_RRMT_ENABLED,
+};
+
+#define AMDGPU_VCN_CAPS(caps) BIT(AMDGPU_VCN_##caps)
+
enum fw_queue_mode {
FW_QUEUE_RING_RESET = 1,
FW_QUEUE_DPG_HOLD_OFF = 2,
@@ -279,6 +294,8 @@ struct amdgpu_vcn_fw_shared {
};
struct amdgpu_vcn_inst {
+ struct amdgpu_device *adev;
+ int inst;
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
@@ -297,6 +314,23 @@ struct amdgpu_vcn_inst {
atomic_t dpg_enc_submission_cnt;
struct amdgpu_vcn_fw_shared fw_shared;
uint8_t aid_id;
+ const struct firmware *fw; /* VCN firmware */
+ uint8_t vcn_config;
+ uint32_t vcn_codec_disable_mask;
+ atomic_t total_submission_cnt;
+ struct mutex vcn_pg_lock;
+ enum amd_powergating_state cur_state;
+ struct delayed_work idle_work;
+ unsigned fw_version;
+ unsigned num_enc_rings;
+ bool indirect_sram;
+ struct amdgpu_vcn_reg internal;
+ struct mutex vcn1_jpeg1_workaround;
+ int (*pause_dpg_mode)(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
+ int (*set_pg_state)(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+ bool using_unified_queue;
};
struct amdgpu_vcn_ras {
@@ -304,35 +338,28 @@ struct amdgpu_vcn_ras {
};
struct amdgpu_vcn {
- unsigned fw_version;
- struct delayed_work idle_work;
- const struct firmware *fw[AMDGPU_MAX_VCN_INSTANCES]; /* VCN firmware */
- unsigned num_enc_rings;
- enum amd_powergating_state cur_state;
- bool indirect_sram;
-
uint8_t num_vcn_inst;
struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
- uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES];
- uint32_t vcn_codec_disable_mask[AMDGPU_MAX_VCN_INSTANCES];
- struct amdgpu_vcn_reg internal;
- struct mutex vcn_pg_lock;
- struct mutex vcn1_jpeg1_workaround;
- atomic_t total_submission_cnt;
unsigned harvest_config;
- int (*pause_dpg_mode)(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
struct ras_common_if *ras_if;
struct amdgpu_vcn_ras *ras;
uint16_t inst_mask;
uint8_t num_inst_per_aid;
- bool using_unified_queue;
/* IP reg dump */
uint32_t *ip_dump;
+
+ uint32_t supported_reset;
+ uint32_t caps;
+
+ bool per_inst_fw;
+ unsigned fw_version;
+
+ bool workload_profile_active;
+ struct mutex workload_profile_mutex;
};
struct amdgpu_fw_shared_rb_ptrs_struct {
@@ -481,11 +508,11 @@ enum vcn_ring_type {
VCN_UNIFIED_RING,
};
-int amdgpu_vcn_early_init(struct amdgpu_device *adev);
-int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
-int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
-int amdgpu_vcn_suspend(struct amdgpu_device *adev);
-int amdgpu_vcn_resume(struct amdgpu_device *adev);
+int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i);
+int amdgpu_vcn_resume(struct amdgpu_device *adev, int i);
void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
@@ -503,7 +530,7 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring);
-void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev);
+void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i);
void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn);
void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
@@ -518,5 +545,12 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx,
enum AMDGPU_UCODE_ID ucode_id);
+int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev);
+int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev);
+void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev);
+
+int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index b6397d3229e1..13f0cdeb59c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -523,6 +523,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
adev->unique_id =
((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid;
+ adev->virt.ras_en_caps.all = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_en_caps.all;
+ adev->virt.ras_telemetry_en_caps.all =
+ ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_telemetry_en_caps.all;
break;
default:
dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version);
@@ -611,10 +614,11 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
vf2pf_info->decode_usage = 0;
vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
- vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr;
-
- if (adev->mes.resource_1) {
- vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size;
+ if (amdgpu_sriov_is_mes_info_enable(adev)) {
+ vf2pf_info->mes_info_addr =
+ (uint64_t)(adev->mes.resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE);
+ vf2pf_info->mes_info_size =
+ adev->mes.resource_1[0]->tbo.base.size - AMDGPU_GPU_PAGE_SIZE;
}
vf2pf_info->checksum =
amd_sriov_msg_checksum(
@@ -703,6 +707,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
} else if (adev->mman.drv_vram_usage_va) {
adev->virt.fw_reserve.p_pf2vf =
(struct amd_sriov_msg_pf2vf_info_header *)
@@ -710,6 +716,8 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_vf2pf =
(struct amd_sriov_msg_vf2pf_info_header *)
(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+ adev->virt.fw_reserve.ras_telemetry =
+ (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
}
amdgpu_virt_read_pf2vf_data(adev);
@@ -732,7 +740,7 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
}
}
-void amdgpu_detect_virtualization(struct amdgpu_device *adev)
+static u32 amdgpu_virt_init_detect_asic(struct amdgpu_device *adev)
{
uint32_t reg;
@@ -768,8 +776,17 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
+ return reg;
+}
+
+static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
+{
+ bool is_sriov = false;
+
/* we have the ability to check now */
if (amdgpu_sriov_vf(adev)) {
+ is_sriov = true;
+
switch (adev->asic_type) {
case CHIP_TONGA:
case CHIP_FIJI:
@@ -798,10 +815,39 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
amdgpu_virt_request_init_data(adev);
break;
default: /* other chip doesn't support SRIOV */
+ is_sriov = false;
DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
break;
}
}
+
+ return is_sriov;
+}
+
+static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
+{
+ ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1);
+ ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1);
+
+ ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+ ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs,
+ RATELIMIT_MSG_ON_RELEASE);
+
+ mutex_init(&adev->virt.ras.ras_telemetry_mutex);
+
+ adev->virt.ras.cper_rptr = 0;
+}
+
+void amdgpu_virt_init(struct amdgpu_device *adev)
+{
+ bool is_sriov = false;
+ uint32_t reg = amdgpu_virt_init_detect_asic(adev);
+
+ is_sriov = amdgpu_virt_init_req_data(adev, reg);
+
+ if (is_sriov)
+ amdgpu_virt_init_ras(adev);
}
static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
@@ -1010,6 +1056,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
void *scratch_reg2;
void *scratch_reg3;
void *spare_int;
+ unsigned long flags;
if (!adev->gfx.rlc.rlcg_reg_access_supported) {
dev_err(adev->dev,
@@ -1031,7 +1078,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2;
scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3;
- mutex_lock(&adev->virt.rlcg_reg_lock);
+ spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags);
if (reg_access_ctrl->spare_int)
spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int;
@@ -1090,7 +1137,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
ret = readl(scratch_reg0);
- mutex_unlock(&adev->virt.rlcg_reg_lock);
+ spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags);
return ret;
}
@@ -1144,3 +1191,313 @@ bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
return xnack_mode;
}
+
+bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!amdgpu_sriov_ras_caps_en(adev))
+ return false;
+
+ if (adev->virt.ras_en_caps.bits.block_umc)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__UMC);
+ if (adev->virt.ras_en_caps.bits.block_sdma)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SDMA);
+ if (adev->virt.ras_en_caps.bits.block_gfx)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__GFX);
+ if (adev->virt.ras_en_caps.bits.block_mmhub)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MMHUB);
+ if (adev->virt.ras_en_caps.bits.block_athub)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__ATHUB);
+ if (adev->virt.ras_en_caps.bits.block_pcie_bif)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__PCIE_BIF);
+ if (adev->virt.ras_en_caps.bits.block_hdp)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__HDP);
+ if (adev->virt.ras_en_caps.bits.block_xgmi_wafl)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__XGMI_WAFL);
+ if (adev->virt.ras_en_caps.bits.block_df)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__DF);
+ if (adev->virt.ras_en_caps.bits.block_smn)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SMN);
+ if (adev->virt.ras_en_caps.bits.block_sem)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SEM);
+ if (adev->virt.ras_en_caps.bits.block_mp0)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP0);
+ if (adev->virt.ras_en_caps.bits.block_mp1)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP1);
+ if (adev->virt.ras_en_caps.bits.block_fuse)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__FUSE);
+ if (adev->virt.ras_en_caps.bits.block_mca)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MCA);
+ if (adev->virt.ras_en_caps.bits.block_vcn)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__VCN);
+ if (adev->virt.ras_en_caps.bits.block_jpeg)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__JPEG);
+ if (adev->virt.ras_en_caps.bits.block_ih)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__IH);
+ if (adev->virt.ras_en_caps.bits.block_mpio)
+ adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MPIO);
+
+ if (adev->virt.ras_en_caps.bits.poison_propogation_mode)
+ con->poison_supported = true; /* Poison is handled by host */
+
+ return true;
+}
+
+static inline enum amd_sriov_ras_telemetry_gpu_block
+amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block block) {
+ switch (block) {
+ case AMDGPU_RAS_BLOCK__UMC:
+ return RAS_TELEMETRY_GPU_BLOCK_UMC;
+ case AMDGPU_RAS_BLOCK__SDMA:
+ return RAS_TELEMETRY_GPU_BLOCK_SDMA;
+ case AMDGPU_RAS_BLOCK__GFX:
+ return RAS_TELEMETRY_GPU_BLOCK_GFX;
+ case AMDGPU_RAS_BLOCK__MMHUB:
+ return RAS_TELEMETRY_GPU_BLOCK_MMHUB;
+ case AMDGPU_RAS_BLOCK__ATHUB:
+ return RAS_TELEMETRY_GPU_BLOCK_ATHUB;
+ case AMDGPU_RAS_BLOCK__PCIE_BIF:
+ return RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF;
+ case AMDGPU_RAS_BLOCK__HDP:
+ return RAS_TELEMETRY_GPU_BLOCK_HDP;
+ case AMDGPU_RAS_BLOCK__XGMI_WAFL:
+ return RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL;
+ case AMDGPU_RAS_BLOCK__DF:
+ return RAS_TELEMETRY_GPU_BLOCK_DF;
+ case AMDGPU_RAS_BLOCK__SMN:
+ return RAS_TELEMETRY_GPU_BLOCK_SMN;
+ case AMDGPU_RAS_BLOCK__SEM:
+ return RAS_TELEMETRY_GPU_BLOCK_SEM;
+ case AMDGPU_RAS_BLOCK__MP0:
+ return RAS_TELEMETRY_GPU_BLOCK_MP0;
+ case AMDGPU_RAS_BLOCK__MP1:
+ return RAS_TELEMETRY_GPU_BLOCK_MP1;
+ case AMDGPU_RAS_BLOCK__FUSE:
+ return RAS_TELEMETRY_GPU_BLOCK_FUSE;
+ case AMDGPU_RAS_BLOCK__MCA:
+ return RAS_TELEMETRY_GPU_BLOCK_MCA;
+ case AMDGPU_RAS_BLOCK__VCN:
+ return RAS_TELEMETRY_GPU_BLOCK_VCN;
+ case AMDGPU_RAS_BLOCK__JPEG:
+ return RAS_TELEMETRY_GPU_BLOCK_JPEG;
+ case AMDGPU_RAS_BLOCK__IH:
+ return RAS_TELEMETRY_GPU_BLOCK_IH;
+ case AMDGPU_RAS_BLOCK__MPIO:
+ return RAS_TELEMETRY_GPU_BLOCK_MPIO;
+ default:
+ DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
+ block);
+ return RAS_TELEMETRY_GPU_BLOCK_COUNT;
+ }
+}
+
+static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry)
+{
+ struct amd_sriov_ras_telemetry_error_count *tmp = NULL;
+ uint32_t checksum, used_size;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ return 0;
+
+ tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
+ goto out;
+
+ memcpy(&adev->virt.count_cache, tmp,
+ min(used_size, sizeof(adev->virt.count_cache)));
+out:
+ kfree(tmp);
+
+ return 0;
+}
+
+static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bool force_update)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (!virt->ops || !virt->ops->req_ras_err_count)
+ return -EOPNOTSUPP;
+
+ /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
+ * will ignore incoming guest messages. Ratelimit the guest messages to
+ * prevent guest self DOS.
+ */
+ if (__ratelimit(&virt->ras.ras_error_cnt_rs) || force_update) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ if (!virt->ops->req_ras_err_count(adev))
+ amdgpu_virt_cache_host_error_counts(adev,
+ virt->fw_reserve.ras_telemetry);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ }
+
+ return 0;
+}
+
+/* Bypass ACA interface and query ECC counts directly from host */
+int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
+ struct ras_err_data *err_data)
+{
+ enum amd_sriov_ras_telemetry_gpu_block sriov_block;
+
+ sriov_block = amdgpu_ras_block_to_sriov(adev, block);
+
+ if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
+ !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
+ return -EOPNOTSUPP;
+
+ /* Host Access may be lost during reset, just return last cached data. */
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_req_ras_err_count_internal(adev, false);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ err_data->ue_count = adev->virt.count_cache.block[sriov_block].ue_count;
+ err_data->ce_count = adev->virt.count_cache.block[sriov_block].ce_count;
+ err_data->de_count = adev->virt.count_cache.block[sriov_block].de_count;
+
+ return 0;
+}
+
+static int
+amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
+ struct amdsriov_ras_telemetry *host_telemetry,
+ u32 *more)
+{
+ struct amd_sriov_ras_cper_dump *cper_dump = NULL;
+ struct cper_hdr *entry = NULL;
+ struct amdgpu_ring *ring = &adev->cper.ring_buf;
+ uint32_t checksum, used_size, i;
+ int ret = 0;
+
+ checksum = host_telemetry->header.checksum;
+ used_size = host_telemetry->header.used_size;
+
+ if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
+ return -EINVAL;
+
+ cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
+ if (!cper_dump)
+ return -ENOMEM;
+
+ if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *more = cper_dump->more;
+
+ if (cper_dump->wptr < adev->virt.ras.cper_rptr) {
+ dev_warn(
+ adev->dev,
+ "guest specified rptr that was too high! guest rptr: 0x%llx, host rptr: 0x%llx\n",
+ adev->virt.ras.cper_rptr, cper_dump->wptr);
+
+ adev->virt.ras.cper_rptr = cper_dump->wptr;
+ goto out;
+ }
+
+ entry = (struct cper_hdr *)&cper_dump->buf[0];
+
+ for (i = 0; i < cper_dump->count; i++) {
+ amdgpu_cper_ring_write(ring, entry, entry->record_length);
+ entry = (struct cper_hdr *)((char *)entry +
+ entry->record_length);
+ }
+
+ if (cper_dump->overflow_count)
+ dev_warn(adev->dev,
+ "host reported CPER overflow of 0x%llx entries!\n",
+ cper_dump->overflow_count);
+
+ adev->virt.ras.cper_rptr = cper_dump->wptr;
+out:
+ kfree(cper_dump);
+
+ return ret;
+}
+
+static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int ret = 0;
+ uint32_t more = 0;
+
+ if (!virt->ops || !virt->ops->req_ras_cper_dump)
+ return -EOPNOTSUPP;
+
+ do {
+ if (!virt->ops->req_ras_cper_dump(adev, virt->ras.cper_rptr))
+ ret = amdgpu_virt_write_cpers_to_ring(
+ adev, virt->fw_reserve.ras_telemetry, &more);
+ else
+ ret = 0;
+ } while (more && !ret);
+
+ return ret;
+}
+
+int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+ int ret = 0;
+
+ if (!amdgpu_sriov_ras_cper_en(adev))
+ return -EOPNOTSUPP;
+
+ if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) &&
+ down_read_trylock(&adev->reset_domain->sem)) {
+ mutex_lock(&virt->ras.ras_telemetry_mutex);
+ ret = amdgpu_virt_req_ras_cper_dump_internal(adev);
+ mutex_unlock(&virt->ras.ras_telemetry_mutex);
+ up_read(&adev->reset_domain->sem);
+ }
+
+ return ret;
+}
+
+int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev)
+{
+ unsigned long ue_count, ce_count;
+
+ if (amdgpu_sriov_ras_telemetry_en(adev)) {
+ amdgpu_virt_req_ras_err_count_internal(adev, true);
+ amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL);
+ }
+
+ return 0;
+}
+
+bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block)
+{
+ enum amd_sriov_ras_telemetry_gpu_block sriov_block;
+
+ sriov_block = amdgpu_ras_block_to_sriov(adev, block);
+
+ if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
+ !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
+ return false;
+
+ return true;
+}
+
+/*
+ * amdgpu_virt_request_bad_pages() - request bad pages
+ * @adev: amdgpu device.
+ * Send command to GPU hypervisor to write new bad pages into the shared PF2VF region
+ */
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (virt->ops && virt->ops->req_bad_pages)
+ virt->ops->req_bad_pages(adev);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b650a2032c42..577c6194db78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -95,6 +95,9 @@ struct amdgpu_virt_ops {
void (*ras_poison_handler)(struct amdgpu_device *adev,
enum amdgpu_ras_block block);
bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
+ int (*req_ras_err_count)(struct amdgpu_device *adev);
+ int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
+ int (*req_bad_pages)(struct amdgpu_device *adev);
};
/*
@@ -103,6 +106,7 @@ struct amdgpu_virt_ops {
struct amdgpu_virt_fw_reserve {
struct amd_sriov_msg_pf2vf_info_header *p_pf2vf;
struct amd_sriov_msg_vf2pf_info_header *p_vf2pf;
+ void *ras_telemetry;
unsigned int checksum_key;
};
@@ -136,15 +140,20 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7),
/* MES info */
AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
+ AMDGIM_FEATURE_RAS_CAPS = (1 << 9),
+ AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
+ AMDGIM_FEATURE_RAS_CPER = (1 << 11),
};
enum AMDGIM_REG_ACCESS_FLAG {
/* Use PSP to program IH_RB_CNTL */
- AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
+ AMDGIM_FEATURE_IH_REG_PSP_EN = (1 << 0),
/* Use RLC to program MMHUB regs */
- AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
+ AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1),
/* Use RLC to program GC regs */
- AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
+ AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2),
+ /* Use PSP to program L1_TLB_CNTL*/
+ AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3),
};
struct amdgim_pf2vf_info_v1 {
@@ -238,6 +247,13 @@ struct amdgpu_virt_ras_err_handler_data {
int last_reserved;
};
+struct amdgpu_virt_ras {
+ struct ratelimit_state ras_error_cnt_rs;
+ struct ratelimit_state ras_cper_dump_rs;
+ struct mutex ras_telemetry_mutex;
+ uint64_t cper_rptr;
+};
+
/* GPU virtualization */
struct amdgpu_virt {
uint32_t caps;
@@ -247,7 +263,10 @@ struct amdgpu_virt {
uint32_t reg_val_offs;
struct amdgpu_irq_src ack_irq;
struct amdgpu_irq_src rcv_irq;
+
struct work_struct flr_work;
+ struct work_struct bad_pages_work;
+
struct amdgpu_mm_table mm_table;
const struct amdgpu_virt_ops *ops;
struct amdgpu_vf_error_buffer vf_errors;
@@ -275,7 +294,13 @@ struct amdgpu_virt {
/* the ucode id to signal the autoload */
uint32_t autoload_ucode_id;
- struct mutex rlcg_reg_lock;
+ /* Spinlock to protect access to the RLCG register interface */
+ spinlock_t rlcg_reg_lock;
+
+ union amd_sriov_ras_caps ras_en_caps;
+ union amd_sriov_ras_caps ras_telemetry_en_caps;
+ struct amdgpu_virt_ras ras;
+ struct amd_sriov_ras_telemetry_error_count count_cache;
};
struct amdgpu_video_codec_info;
@@ -311,6 +336,10 @@ struct amdgpu_video_codec_info;
(amdgpu_sriov_vf((adev)) && \
((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN)))
+#define amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev) \
+(amdgpu_sriov_vf((adev)) && \
+ ((adev)->virt.reg_access & (AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN)))
+
#define amdgpu_sriov_rlcg_error_report_enabled(adev) \
(amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev))
@@ -320,6 +349,18 @@ struct amdgpu_video_codec_info;
#define amdgpu_sriov_vf_mmio_access_protection(adev) \
((adev)->virt.caps & AMDGPU_VF_MMIO_ACCESS_PROTECT)
+#define amdgpu_sriov_ras_caps_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CAPS)
+
+#define amdgpu_sriov_ras_telemetry_en(adev) \
+(((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_TELEMETRY) && (adev)->virt.fw_reserve.ras_telemetry)
+
+#define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \
+(amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk))
+
+#define amdgpu_sriov_ras_cper_en(adev) \
+((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER)
+
static inline bool is_virtual_machine(void)
{
#if defined(CONFIG_X86)
@@ -333,6 +374,8 @@ static inline bool is_virtual_machine(void)
#define amdgpu_sriov_is_pp_one_vf(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
+#define amdgpu_sriov_multi_vf_mode(adev) \
+ (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
#define amdgpu_sriov_is_debug(adev) \
((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
#define amdgpu_sriov_is_normal(adev) \
@@ -358,7 +401,7 @@ void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev);
void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
void amdgpu_virt_exchange_data(struct amdgpu_device *adev);
void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev);
-void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+void amdgpu_virt_init(struct amdgpu_device *adev);
bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
@@ -383,4 +426,12 @@ bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
u32 acc_flags, u32 hwip,
bool write, u32 *rlcg_flag);
u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id);
+bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev);
+int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
+ struct ras_err_data *err_data);
+int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update);
+int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev);
+bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block);
+void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index d4c2afafbb73..155bb9891a17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -188,8 +188,8 @@ static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
amdgpu_crtc->connector = NULL;
amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE;
- hrtimer_init(&amdgpu_crtc->vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- amdgpu_crtc->vblank_timer.function = &amdgpu_vkms_vblank_simulate;
+ hrtimer_setup(&amdgpu_crtc->vblank_timer, &amdgpu_vkms_vblank_simulate, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
return ret;
}
@@ -493,10 +493,10 @@ const struct drm_mode_config_funcs amdgpu_vkms_mode_funcs = {
.atomic_commit = drm_atomic_helper_commit,
};
-static int amdgpu_vkms_sw_init(void *handle)
+static int amdgpu_vkms_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->amdgpu_vkms_output = kcalloc(adev->mode_info.num_crtc,
sizeof(struct amdgpu_vkms_output), GFP_KERNEL);
@@ -536,9 +536,9 @@ static int amdgpu_vkms_sw_init(void *handle)
return 0;
}
-static int amdgpu_vkms_sw_fini(void *handle)
+static int amdgpu_vkms_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i = 0;
for (i = 0; i < adev->mode_info.num_crtc; i++)
@@ -555,9 +555,9 @@ static int amdgpu_vkms_sw_fini(void *handle)
return 0;
}
-static int amdgpu_vkms_hw_init(void *handle)
+static int amdgpu_vkms_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
@@ -600,55 +600,45 @@ static int amdgpu_vkms_hw_init(void *handle)
return 0;
}
-static int amdgpu_vkms_hw_fini(void *handle)
+static int amdgpu_vkms_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int amdgpu_vkms_suspend(void *handle)
+static int amdgpu_vkms_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = drm_mode_config_helper_suspend(adev_to_drm(adev));
if (r)
return r;
- return amdgpu_vkms_hw_fini(handle);
+
+ return 0;
}
-static int amdgpu_vkms_resume(void *handle)
+static int amdgpu_vkms_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_vkms_hw_init(handle);
+ r = amdgpu_vkms_hw_init(ip_block);
if (r)
return r;
- return drm_mode_config_helper_resume(adev_to_drm(adev));
+ return drm_mode_config_helper_resume(adev_to_drm(ip_block->adev));
}
-static bool amdgpu_vkms_is_idle(void *handle)
+static bool amdgpu_vkms_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int amdgpu_vkms_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int amdgpu_vkms_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int amdgpu_vkms_set_clockgating_state(void *handle,
+static int amdgpu_vkms_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int amdgpu_vkms_set_powergating_state(void *handle,
+static int amdgpu_vkms_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -656,8 +646,6 @@ static int amdgpu_vkms_set_powergating_state(void *handle,
static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = {
.name = "amdgpu_vkms",
- .early_init = NULL,
- .late_init = NULL,
.sw_init = amdgpu_vkms_sw_init,
.sw_fini = amdgpu_vkms_sw_fini,
.hw_init = amdgpu_vkms_hw_init,
@@ -665,12 +653,8 @@ static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = {
.suspend = amdgpu_vkms_suspend,
.resume = amdgpu_vkms_resume,
.is_idle = amdgpu_vkms_is_idle,
- .wait_for_idle = amdgpu_vkms_wait_for_idle,
- .soft_reset = amdgpu_vkms_soft_reset,
.set_clockgating_state = amdgpu_vkms_set_clockgating_state,
.set_powergating_state = amdgpu_vkms_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6005280f5f38..3911c78f8282 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -36,6 +36,7 @@
#include <drm/ttm/ttm_tt.h>
#include <drm/drm_exec.h>
#include "amdgpu.h"
+#include "amdgpu_vm.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_gmc.h"
@@ -311,6 +312,111 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
}
/**
+ * amdgpu_vm_update_shared - helper to update shared memory stat
+ * @base: base structure for tracking BO usage in a VM
+ *
+ * Takes the vm status_lock and updates the shared memory stat. If the basic
+ * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
+ * as well.
+ */
+static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
+{
+ struct amdgpu_vm *vm = base->vm;
+ struct amdgpu_bo *bo = base->bo;
+ uint64_t size = amdgpu_bo_size(bo);
+ uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+ bool shared;
+
+ spin_lock(&vm->status_lock);
+ shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+ if (base->shared != shared) {
+ base->shared = shared;
+ if (shared) {
+ vm->stats[bo_memtype].drm.shared += size;
+ vm->stats[bo_memtype].drm.private -= size;
+ } else {
+ vm->stats[bo_memtype].drm.shared -= size;
+ vm->stats[bo_memtype].drm.private += size;
+ }
+ }
+ spin_unlock(&vm->status_lock);
+}
+
+/**
+ * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared
+ * @bo: amdgpu buffer object
+ *
+ * Update the per VM stats for all the vm if needed from private to shared or
+ * vice versa.
+ */
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
+{
+ struct amdgpu_vm_bo_base *base;
+
+ for (base = bo->vm_bo; base; base = base->next)
+ amdgpu_vm_update_shared(base);
+}
+
+/**
+ * amdgpu_vm_update_stats_locked - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res: the ttm_resource to use for the purpose of accounting, may or may not
+ * be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
+ *
+ * Caller need to have the vm status_lock held. Useful for when multiple update
+ * need to happen at the same time.
+ */
+static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *res, int sign)
+{
+ struct amdgpu_vm *vm = base->vm;
+ struct amdgpu_bo *bo = base->bo;
+ int64_t size = sign * amdgpu_bo_size(bo);
+ uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
+
+ /* For drm-total- and drm-shared-, BO are accounted by their preferred
+ * placement, see also amdgpu_bo_mem_stats_placement.
+ */
+ if (base->shared)
+ vm->stats[bo_memtype].drm.shared += size;
+ else
+ vm->stats[bo_memtype].drm.private += size;
+
+ if (res && res->mem_type < __AMDGPU_PL_NUM) {
+ uint32_t res_memtype = res->mem_type;
+
+ vm->stats[res_memtype].drm.resident += size;
+ /* BO only count as purgeable if it is resident,
+ * since otherwise there's nothing to purge.
+ */
+ if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
+ vm->stats[res_memtype].drm.purgeable += size;
+ if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
+ vm->stats[bo_memtype].evicted += size;
+ }
+}
+
+/**
+ * amdgpu_vm_update_stats - helper to update normal memory stat
+ * @base: base structure for tracking BO usage in a VM
+ * @res: the ttm_resource to use for the purpose of accounting, may or may not
+ * be bo->tbo.resource
+ * @sign: if we should add (+1) or subtract (-1) from the stat
+ *
+ * Updates the basic memory stat when bo is added/deleted/moved.
+ */
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *res, int sign)
+{
+ struct amdgpu_vm *vm = base->vm;
+
+ spin_lock(&vm->status_lock);
+ amdgpu_vm_update_stats_locked(base, res, sign);
+ spin_unlock(&vm->status_lock);
+}
+
+/**
* amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
*
* @base: base structure for tracking BO usage in a VM
@@ -333,6 +439,11 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->next = bo->vm_bo;
bo->vm_bo = base;
+ spin_lock(&vm->status_lock);
+ base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
+ amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
+ spin_unlock(&vm->status_lock);
+
if (!amdgpu_vm_is_bo_always_valid(vm, bo))
return;
@@ -643,6 +754,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool need_pipe_sync)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
@@ -650,8 +762,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool gds_switch_needed = ring->funcs->emit_gds_switch &&
job->gds_switch_needed;
bool vm_flush_needed = job->vm_needs_flush;
- struct dma_fence *fence = NULL;
+ bool cleaner_shader_needed = false;
bool pasid_mapping_needed = false;
+ struct dma_fence *fence = NULL;
unsigned int patch;
int r;
@@ -674,12 +787,13 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
ring->funcs->emit_wreg;
- if (adev->gfx.enable_cleaner_shader &&
- ring->funcs->emit_cleaner_shader &&
- job->enforce_isolation)
- ring->funcs->emit_cleaner_shader(ring);
+ cleaner_shader_needed = job->run_cleaner_shader &&
+ adev->gfx.enable_cleaner_shader &&
+ ring->funcs->emit_cleaner_shader && job->base.s_fence &&
+ &job->base.s_fence->scheduled == isolation->spearhead;
- if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
+ if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
+ !cleaner_shader_needed)
return 0;
amdgpu_ring_ib_begin(ring);
@@ -690,6 +804,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (need_pipe_sync)
amdgpu_ring_emit_pipeline_sync(ring);
+ if (cleaner_shader_needed)
+ ring->funcs->emit_cleaner_shader(ring);
+
if (vm_flush_needed) {
trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
@@ -701,7 +818,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
- if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
+ if (ring->funcs->emit_gds_switch &&
gds_switch_needed) {
amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
job->gds_size, job->gws_base,
@@ -709,7 +826,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
job->oa_size);
}
- if (vm_flush_needed || pasid_mapping_needed) {
+ if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
r = amdgpu_fence_emit(ring, &fence, NULL, 0);
if (r)
return r;
@@ -731,6 +848,18 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
id->pasid_mapping = dma_fence_get(fence);
mutex_unlock(&id_mgr->lock);
}
+
+ /*
+ * Make sure that all other submissions wait for the cleaner shader to
+ * finish before we push them to the HW.
+ */
+ if (cleaner_shader_needed) {
+ trace_amdgpu_cleaner_shader(ring, fence);
+ mutex_lock(&adev->enforce_isolation_mutex);
+ dma_fence_put(isolation->spearhead);
+ isolation->spearhead = dma_fence_get(fence);
+ mutex_unlock(&adev->enforce_isolation_mutex);
+ }
dma_fence_put(fence);
amdgpu_ring_patch_cond_exec(ring, patch);
@@ -1082,51 +1211,11 @@ error_free:
return r;
}
-static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va,
- struct amdgpu_mem_stats *stats)
-{
- struct amdgpu_vm *vm = bo_va->base.vm;
- struct amdgpu_bo *bo = bo_va->base.bo;
-
- if (!bo)
- return;
-
- /*
- * For now ignore BOs which are currently locked and potentially
- * changing their location.
- */
- if (!amdgpu_vm_is_bo_always_valid(vm, bo) &&
- !dma_resv_trylock(bo->tbo.base.resv))
- return;
-
- amdgpu_bo_get_memory(bo, stats);
- if (!amdgpu_vm_is_bo_always_valid(vm, bo))
- dma_resv_unlock(bo->tbo.base.resv);
-}
-
void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
- struct amdgpu_mem_stats *stats)
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
{
- struct amdgpu_bo_va *bo_va, *tmp;
-
spin_lock(&vm->status_lock);
- list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
-
- list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status)
- amdgpu_vm_bo_get_memory(bo_va, stats);
+ memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
spin_unlock(&vm->status_lock);
}
@@ -1159,7 +1248,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
int r;
amdgpu_sync_create(&sync);
- if (clear || !bo) {
+ if (clear) {
mem = NULL;
/* Implicitly sync to command submissions in the same VM before
@@ -1174,6 +1263,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
if (r)
goto error_free;
}
+ } else if (!bo) {
+ mem = NULL;
+
+ /* PRT map operations don't need to sync to anything. */
} else {
struct drm_gem_object *obj = &bo->tbo.base;
@@ -1259,10 +1352,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
* next command submission.
*/
if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
- uint32_t mem_type = bo->tbo.resource->mem_type;
-
- if (!(bo->preferred_domains &
- amdgpu_mem_type_to_domain(mem_type)))
+ if (bo->tbo.resource &&
+ !(bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)))
amdgpu_vm_bo_evicted(&bo_va->base);
else
amdgpu_vm_bo_idle(&bo_va->base);
@@ -2069,6 +2161,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
if (*base != &bo_va->base)
continue;
+ amdgpu_vm_update_stats(*base, bo->tbo.resource, -1);
*base = bo_va->base.next;
break;
}
@@ -2137,14 +2230,12 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
/**
* amdgpu_vm_bo_invalidate - mark the bo as invalid
*
- * @adev: amdgpu_device pointer
* @bo: amdgpu buffer object
* @evicted: is the BO evicted
*
* Mark @bo as invalid.
*/
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo, bool evicted)
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted)
{
struct amdgpu_vm_bo_base *bo_base;
@@ -2170,6 +2261,32 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
}
/**
+ * amdgpu_vm_bo_move - handle BO move
+ *
+ * @bo: amdgpu buffer object
+ * @new_mem: the new placement of the BO move
+ * @evicted: is the BO evicted
+ *
+ * Update the memory stats for the new placement and mark @bo as invalid.
+ */
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+ bool evicted)
+{
+ struct amdgpu_vm_bo_base *bo_base;
+
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
+ struct amdgpu_vm *vm = bo_base->vm;
+
+ spin_lock(&vm->status_lock);
+ amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
+ amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
+ spin_unlock(&vm->status_lock);
+ }
+
+ amdgpu_vm_bo_invalidate(bo, evicted);
+}
+
+/**
* amdgpu_vm_get_block_size - calculate VM page table size as power of two
*
* @vm_size: VM size
@@ -2434,8 +2551,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->freed);
INIT_LIST_HEAD(&vm->done);
- INIT_LIST_HEAD(&vm->pt_freed);
- INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
INIT_KFIFO(vm->faults);
r = amdgpu_vm_init_entities(adev, vm);
@@ -2574,18 +2689,14 @@ unreserve_bo:
return r;
}
-/**
- * amdgpu_vm_release_compute - release a compute vm
- * @adev: amdgpu_device pointer
- * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
- *
- * This is a correspondant of amdgpu_vm_make_compute. It decouples compute
- * pasid from vm. Compute should stop use of vm after this call.
- */
-void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm)
{
- amdgpu_vm_set_pasid(adev, vm, 0);
- vm->is_compute_context = false;
+ for (int i = 0; i < __AMDGPU_PL_NUM; ++i) {
+ if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) &&
+ vm->stats[i].evicted == 0))
+ return false;
+ }
+ return true;
}
/**
@@ -2607,11 +2718,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
- flush_work(&vm->pt_free_work);
-
root = amdgpu_bo_ref(vm->root.bo);
amdgpu_bo_reserve(root, true);
- amdgpu_vm_put_task_info(vm->task_info);
amdgpu_vm_set_pasid(adev, vm, 0);
dma_fence_wait(vm->last_unlocked, false);
dma_fence_put(vm->last_unlocked);
@@ -2660,6 +2768,16 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
}
ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
+
+ if (!amdgpu_vm_stats_is_zero(vm)) {
+ struct amdgpu_task_info *ti = vm->task_info;
+
+ dev_warn(adev->dev,
+ "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n",
+ ti->process_name, ti->pid, ti->task_name, ti->tgid);
+ }
+
+ amdgpu_vm_put_task_info(vm->task_info);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 52dd7cdfdc81..f3ad687125ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -35,6 +35,7 @@
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
#include "amdgpu_ids.h"
+#include "amdgpu_ttm.h"
struct drm_exec;
@@ -42,7 +43,6 @@ struct amdgpu_bo_va;
struct amdgpu_job;
struct amdgpu_bo_list_entry;
struct amdgpu_bo_vm;
-struct amdgpu_mem_stats;
/*
* GPUVM handling
@@ -203,9 +203,13 @@ struct amdgpu_vm_bo_base {
/* protected by bo being reserved */
struct amdgpu_vm_bo_base *next;
- /* protected by spinlock */
+ /* protected by vm status_lock */
struct list_head vm_status;
+ /* if the bo is counted as shared in mem stats
+ * protected by vm status_lock */
+ bool shared;
+
/* protected by the BO being reserved */
bool moved;
};
@@ -322,6 +326,13 @@ struct amdgpu_vm_fault_info {
unsigned int vmhub;
};
+struct amdgpu_mem_stats {
+ struct drm_memory_stats drm;
+
+ /* buffers that requested this placement but are currently evicted */
+ uint64_t evicted;
+};
+
struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
@@ -336,6 +347,9 @@ struct amdgpu_vm {
/* Lock to protect vm_bo add/del/move on all lists of vm */
spinlock_t status_lock;
+ /* Memory statistics for this vm, protected by status_lock */
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM];
+
/* Per-VM and PT BOs who needs a validation */
struct list_head evicted;
@@ -360,10 +374,6 @@ struct amdgpu_vm {
/* BOs which are invalidated, has been updated in the PTs */
struct list_head done;
- /* PT BOs scheduled to free and fill with zero if vm_resv is not hold */
- struct list_head pt_freed;
- struct work_struct pt_free_work;
-
/* contains the page directory */
struct amdgpu_vm_bo_base root;
struct dma_fence *last_update;
@@ -479,7 +489,6 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
unsigned int num_fences);
@@ -515,8 +524,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
- struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted);
+void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
+ struct ttm_resource *new_res, int sign);
+void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo);
+void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
+ bool evicted);
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
struct amdgpu_bo *bo);
@@ -567,7 +580,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
- struct amdgpu_mem_stats *stats);
+ struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]);
int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_vm *vmbo, bool immediate);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index f78a0434a48f..30022123b0bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -537,6 +537,7 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
if (!entry->bo)
return;
+ amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1);
entry->bo->vm_bo = NULL;
ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
@@ -546,27 +547,6 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
amdgpu_bo_unref(&entry->bo);
}
-void amdgpu_vm_pt_free_work(struct work_struct *work)
-{
- struct amdgpu_vm_bo_base *entry, *next;
- struct amdgpu_vm *vm;
- LIST_HEAD(pt_freed);
-
- vm = container_of(work, struct amdgpu_vm, pt_free_work);
-
- spin_lock(&vm->status_lock);
- list_splice_init(&vm->pt_freed, &pt_freed);
- spin_unlock(&vm->status_lock);
-
- /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
- amdgpu_bo_reserve(vm->root.bo, true);
-
- list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
- amdgpu_vm_pt_free(entry);
-
- amdgpu_bo_unreserve(vm->root.bo);
-}
-
/**
* amdgpu_vm_pt_free_list - free PD/PT levels
*
@@ -579,19 +559,15 @@ void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
struct amdgpu_vm_update_params *params)
{
struct amdgpu_vm_bo_base *entry, *next;
- struct amdgpu_vm *vm = params->vm;
bool unlocked = params->unlocked;
if (list_empty(&params->tlb_flush_waitlist))
return;
- if (unlocked) {
- spin_lock(&vm->status_lock);
- list_splice_init(&params->tlb_flush_waitlist, &vm->pt_freed);
- spin_unlock(&vm->status_lock);
- schedule_work(&vm->pt_free_work);
- return;
- }
+ /*
+ * unlocked unmap clear page table leaves, warning to free the page entry.
+ */
+ WARN_ON(unlocked);
list_for_each_entry_safe(entry, next, &params->tlb_flush_waitlist, vm_status)
amdgpu_vm_pt_free(entry);
@@ -899,7 +875,15 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
pe_start = ((cursor.pfn >> shift) & mask) * 8;
- entry_end = ((uint64_t)mask + 1) << shift;
+
+ if (cursor.level < AMDGPU_VM_PTB && params->unlocked)
+ /*
+ * MMU notifier callback unlocked unmap huge page, leave is PDE entry,
+ * only clear one entry. Next entry search again for PDE or PTE leave.
+ */
+ entry_end = 1ULL << shift;
+ else
+ entry_end = ((uint64_t)mask + 1) << shift;
entry_end += cursor.pfn & ~(entry_end - 1);
entry_end = min(entry_end, end);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index 5acd20ff5979..121ee17b522b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -236,7 +236,8 @@ int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe)
int ret;
amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix));
- ret = amdgpu_ucode_request(adev, &adev->vpe.fw, "amdgpu/%s.bin", fw_prefix);
+ ret = amdgpu_ucode_request(adev, &adev->vpe.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s.bin", fw_prefix);
if (ret)
goto out;
@@ -295,9 +296,9 @@ int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe)
return 0;
}
-static int vpe_early_init(void *handle)
+static int vpe_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_vpe *vpe = &adev->vpe;
switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
@@ -356,9 +357,9 @@ static int vpe_common_init(struct amdgpu_vpe *vpe)
return 0;
}
-static int vpe_sw_init(void *handle)
+static int vpe_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_vpe *vpe = &adev->vpe;
int ret;
@@ -377,18 +378,26 @@ static int vpe_sw_init(void *handle)
ret = vpe_init_microcode(vpe);
if (ret)
goto out;
+
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->vpe.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vpe.ring);
+ ret = amdgpu_vpe_sysfs_reset_mask_init(adev);
+ if (ret)
+ goto out;
out:
return ret;
}
-static int vpe_sw_fini(void *handle)
+static int vpe_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_vpe *vpe = &adev->vpe;
release_firmware(vpe->fw);
vpe->fw = NULL;
+ amdgpu_vpe_sysfs_reset_mask_fini(adev);
vpe_ring_fini(vpe);
amdgpu_bo_free_kernel(&adev->vpe.cmdbuf_obj,
@@ -398,9 +407,9 @@ static int vpe_sw_fini(void *handle)
return 0;
}
-static int vpe_hw_init(void *handle)
+static int vpe_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_vpe *vpe = &adev->vpe;
int ret;
@@ -421,9 +430,9 @@ static int vpe_hw_init(void *handle)
return 0;
}
-static int vpe_hw_fini(void *handle)
+static int vpe_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_vpe *vpe = &adev->vpe;
vpe_ring_stop(vpe);
@@ -434,20 +443,18 @@ static int vpe_hw_fini(void *handle)
return 0;
}
-static int vpe_suspend(void *handle)
+static int vpe_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->vpe.idle_work);
- return vpe_hw_fini(adev);
+ return vpe_hw_fini(ip_block);
}
-static int vpe_resume(void *handle)
+static int vpe_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vpe_hw_init(adev);
+ return vpe_hw_init(ip_block);
}
static void vpe_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -640,16 +647,16 @@ static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
return r;
}
-static int vpe_set_clockgating_state(void *handle,
+static int vpe_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int vpe_set_powergating_state(void *handle,
+static int vpe_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_vpe *vpe = &adev->vpe;
if (!adev->pm.dpm_enabled)
@@ -827,7 +834,7 @@ static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout)
ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -867,6 +874,43 @@ static void vpe_ring_end_use(struct amdgpu_ring *ring)
schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT);
}
+static ssize_t amdgpu_get_vpe_reset_mask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (!adev)
+ return -ENODEV;
+
+ return amdgpu_show_reset_mask(buf, adev->vpe.supported_reset);
+}
+
+static DEVICE_ATTR(vpe_reset_mask, 0444,
+ amdgpu_get_vpe_reset_mask, NULL);
+
+int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->vpe.num_instances) {
+ r = device_create_file(adev->dev, &dev_attr_vpe_reset_mask);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev)
+{
+ if (adev->dev->kobj.sd) {
+ if (adev->vpe.num_instances)
+ device_remove_file(adev->dev, &dev_attr_vpe_reset_mask);
+ }
+}
+
static const struct amdgpu_ring_funcs vpe_ring_funcs = {
.type = AMDGPU_RING_TYPE_VPE,
.align_mask = 0xf,
@@ -908,14 +952,12 @@ static void vpe_set_ring_funcs(struct amdgpu_device *adev)
const struct amd_ip_funcs vpe_ip_funcs = {
.name = "vpe_v6_1",
.early_init = vpe_early_init,
- .late_init = NULL,
.sw_init = vpe_sw_init,
.sw_fini = vpe_sw_fini,
.hw_init = vpe_hw_init,
.hw_fini = vpe_hw_fini,
.suspend = vpe_suspend,
.resume = vpe_resume,
- .soft_reset = NULL,
.set_clockgating_state = vpe_set_clockgating_state,
.set_powergating_state = vpe_set_powergating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
index 231d86d0953e..695da740a97e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h
@@ -79,6 +79,7 @@ struct amdgpu_vpe {
uint32_t num_instances;
bool collaborate_mode;
+ uint32_t supported_reset;
};
int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev);
@@ -86,6 +87,8 @@ int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe);
int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe);
int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe);
int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe);
+void amdgpu_vpe_sysfs_reset_mask_fini(struct amdgpu_device *adev);
+int amdgpu_vpe_sysfs_reset_mask_init(struct amdgpu_device *adev);
#define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0)
#define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 7d26a962f811..abdc52b0895a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -24,11 +24,11 @@
#include <linux/dma-mapping.h>
#include <drm/ttm/ttm_range_manager.h>
+#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
#include "amdgpu_res_cursor.h"
-#include "amdgpu_atomfirmware.h"
#include "atom.h"
#define AMDGPU_MAX_SG_SEGMENT_SIZE (2UL << 30)
@@ -463,7 +463,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
int r;
lpfn = (u64)place->lpfn << PAGE_SHIFT;
- if (!lpfn)
+ if (!lpfn || lpfn > man->size)
lpfn = man->size;
fpfn = (u64)place->fpfn << PAGE_SHIFT;
@@ -567,7 +567,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
else
remaining_size -= size;
}
- mutex_unlock(&mgr->lock);
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
struct drm_buddy_block *dcc_block;
@@ -584,6 +583,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
(u64)vres->base.size,
&vres->blocks);
}
+ mutex_unlock(&mgr->lock);
vres->base.start = 0;
size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
@@ -908,6 +908,9 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
struct ttm_resource_manager *man = &mgr->manager;
int err;
+ man->cg = drmm_cgroup_register_region(adev_to_drm(adev), "vram", adev->gmc.real_vram_size);
+ if (IS_ERR(man->cg))
+ return PTR_ERR(man->cg);
ttm_resource_manager_init(man, &adev->mman.bdev,
adev->gmc.real_vram_size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
index a6d456ec6aeb..322816805bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -27,6 +27,9 @@
#include <drm/drm_drv.h>
#include "../amdxcp/amdgpu_xcp_drv.h"
+static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr);
+static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr);
+
static int __amdgpu_xcp_run(struct amdgpu_xcp_mgr *xcp_mgr,
struct amdgpu_xcp_ip *xcp_ip, int xcp_state)
{
@@ -189,7 +192,7 @@ static int __amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
goto out;
}
-
+ amdgpu_xcp_sysfs_entries_update(xcp_mgr);
out:
mutex_unlock(&xcp_mgr->xcp_lock);
@@ -263,9 +266,10 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
if (ret == -ENOSPC) {
dev_warn(adev->dev,
"Skip xcp node #%d when out of drm node resource.", i);
- return 0;
+ ret = 0;
+ goto out;
} else if (ret) {
- return ret;
+ goto out;
}
/* Redirect all IOCTLs to the primary device */
@@ -278,9 +282,14 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
p_ddev->vma_offset_manager = ddev->vma_offset_manager;
p_ddev->driver = &amdgpu_partition_driver;
adev->xcp_mgr->xcp[i].ddev = p_ddev;
+
+ dev_set_drvdata(p_ddev->dev, &adev->xcp_mgr->xcp[i]);
}
+ ret = 0;
+out:
+ amdgpu_xcp_sysfs_entries_init(adev->xcp_mgr);
- return 0;
+ return ret;
}
int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
@@ -288,6 +297,7 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
struct amdgpu_xcp_mgr_funcs *xcp_funcs)
{
struct amdgpu_xcp_mgr *xcp_mgr;
+ int i;
if (!xcp_funcs || !xcp_funcs->get_ip_details)
return -EINVAL;
@@ -306,6 +316,8 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode,
amdgpu_xcp_init(xcp_mgr, init_num_xcps, init_mode);
adev->xcp_mgr = xcp_mgr;
+ for (i = 0; i < MAX_XCP; ++i)
+ xcp_mgr->xcp[i].xcp_mgr = xcp_mgr;
return amdgpu_xcp_dev_alloc(adev);
}
@@ -427,9 +439,420 @@ void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
return;
sched = entity->entity.rq->sched;
- if (sched->ready) {
+ if (drm_sched_wqueue_ready(sched)) {
ring = to_amdgpu_ring(entity->entity.rq->sched);
atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt);
}
}
+/*====================== xcp sysfs - configuration ======================*/
+#define XCP_CFG_SYSFS_RES_ATTR_SHOW(_name) \
+ static ssize_t amdgpu_xcp_res_sysfs_##_name##_show( \
+ struct amdgpu_xcp_res_details *xcp_res, char *buf) \
+ { \
+ return sysfs_emit(buf, "%d\n", xcp_res->_name); \
+ }
+
+struct amdgpu_xcp_res_sysfs_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct amdgpu_xcp_res_details *xcp_res, char *buf);
+};
+
+#define XCP_CFG_SYSFS_RES_ATTR(_name) \
+ struct amdgpu_xcp_res_sysfs_attribute xcp_res_sysfs_attr_##_name = { \
+ .attr = { .name = __stringify(_name), .mode = 0400 }, \
+ .show = amdgpu_xcp_res_sysfs_##_name##_show, \
+ }
+
+XCP_CFG_SYSFS_RES_ATTR_SHOW(num_inst)
+XCP_CFG_SYSFS_RES_ATTR(num_inst);
+XCP_CFG_SYSFS_RES_ATTR_SHOW(num_shared)
+XCP_CFG_SYSFS_RES_ATTR(num_shared);
+
+#define XCP_CFG_SYSFS_RES_ATTR_PTR(_name) xcp_res_sysfs_attr_##_name.attr
+
+static struct attribute *xcp_cfg_res_sysfs_attrs[] = {
+ &XCP_CFG_SYSFS_RES_ATTR_PTR(num_inst),
+ &XCP_CFG_SYSFS_RES_ATTR_PTR(num_shared), NULL
+};
+
+static const char *xcp_desc[] = {
+ [AMDGPU_SPX_PARTITION_MODE] = "SPX",
+ [AMDGPU_DPX_PARTITION_MODE] = "DPX",
+ [AMDGPU_TPX_PARTITION_MODE] = "TPX",
+ [AMDGPU_QPX_PARTITION_MODE] = "QPX",
+ [AMDGPU_CPX_PARTITION_MODE] = "CPX",
+};
+
+static const char *nps_desc[] = {
+ [UNKNOWN_MEMORY_PARTITION_MODE] = "UNKNOWN",
+ [AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
+ [AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
+ [AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
+ [AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
+ [AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
+ [AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
+};
+
+ATTRIBUTE_GROUPS(xcp_cfg_res_sysfs);
+
+#define to_xcp_attr(x) \
+ container_of(x, struct amdgpu_xcp_res_sysfs_attribute, attr)
+#define to_xcp_res(x) container_of(x, struct amdgpu_xcp_res_details, kobj)
+
+static ssize_t xcp_cfg_res_sysfs_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_res_sysfs_attribute *attribute;
+ struct amdgpu_xcp_res_details *xcp_res;
+
+ attribute = to_xcp_attr(attr);
+ xcp_res = to_xcp_res(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(xcp_res, buf);
+}
+
+static const struct sysfs_ops xcp_cfg_res_sysfs_ops = {
+ .show = xcp_cfg_res_sysfs_attr_show,
+};
+
+static const struct kobj_type xcp_cfg_res_sysfs_ktype = {
+ .sysfs_ops = &xcp_cfg_res_sysfs_ops,
+ .default_groups = xcp_cfg_res_sysfs_groups,
+};
+
+const char *xcp_res_names[] = {
+ [AMDGPU_XCP_RES_XCC] = "xcc",
+ [AMDGPU_XCP_RES_DMA] = "dma",
+ [AMDGPU_XCP_RES_DEC] = "dec",
+ [AMDGPU_XCP_RES_JPEG] = "jpeg",
+};
+
+static int amdgpu_xcp_get_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg)
+{
+ if (xcp_mgr->funcs && xcp_mgr->funcs->get_xcp_res_info)
+ return xcp_mgr->funcs->get_xcp_res_info(xcp_mgr, mode, xcp_cfg);
+
+ return -EOPNOTSUPP;
+}
+
+#define to_xcp_cfg(x) container_of(x, struct amdgpu_xcp_cfg, kobj)
+static ssize_t supported_xcp_configs_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ struct amdgpu_xcp_mgr *xcp_mgr = xcp_cfg->xcp_mgr;
+ int size = 0, mode;
+ char *sep = "";
+
+ if (!xcp_mgr || !xcp_mgr->supp_xcp_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
+ sep = ", ";
+ }
+
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static ssize_t supported_nps_configs_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ int size = 0, mode;
+ char *sep = "";
+
+ if (!xcp_cfg || !xcp_cfg->compatible_nps_modes)
+ return sysfs_emit(buf, "Not supported\n");
+
+ for_each_inst(mode, xcp_cfg->compatible_nps_modes) {
+ size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
+ sep = ", ";
+ }
+
+ size += sysfs_emit_at(buf, size, "\n");
+
+ return size;
+}
+
+static ssize_t xcp_config_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+
+ return sysfs_emit(buf, "%s\n",
+ amdgpu_gfx_compute_mode_desc(xcp_cfg->mode));
+}
+
+static ssize_t xcp_config_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t size)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+ int mode, r;
+
+ if (!strncasecmp("SPX", buf, strlen("SPX")))
+ mode = AMDGPU_SPX_PARTITION_MODE;
+ else if (!strncasecmp("DPX", buf, strlen("DPX")))
+ mode = AMDGPU_DPX_PARTITION_MODE;
+ else if (!strncasecmp("TPX", buf, strlen("TPX")))
+ mode = AMDGPU_TPX_PARTITION_MODE;
+ else if (!strncasecmp("QPX", buf, strlen("QPX")))
+ mode = AMDGPU_QPX_PARTITION_MODE;
+ else if (!strncasecmp("CPX", buf, strlen("CPX")))
+ mode = AMDGPU_CPX_PARTITION_MODE;
+ else
+ return -EINVAL;
+
+ r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
+
+ if (r)
+ return r;
+
+ xcp_cfg->mode = mode;
+ return size;
+}
+
+static struct kobj_attribute xcp_cfg_sysfs_mode =
+ __ATTR_RW_MODE(xcp_config, 0644);
+
+static void xcp_cfg_sysfs_release(struct kobject *kobj)
+{
+ struct amdgpu_xcp_cfg *xcp_cfg = to_xcp_cfg(kobj);
+
+ kfree(xcp_cfg);
+}
+
+static const struct kobj_type xcp_cfg_sysfs_ktype = {
+ .release = xcp_cfg_sysfs_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static struct kobj_attribute supp_part_sysfs_mode =
+ __ATTR_RO(supported_xcp_configs);
+
+static struct kobj_attribute supp_nps_sysfs_mode =
+ __ATTR_RO(supported_nps_configs);
+
+static const struct attribute *xcp_attrs[] = {
+ &supp_part_sysfs_mode.attr,
+ &xcp_cfg_sysfs_mode.attr,
+ NULL,
+};
+
+static void amdgpu_xcp_cfg_sysfs_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_res_details *xcp_res;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ int i, r, j, rid, mode;
+
+ if (!adev->xcp_mgr)
+ return;
+
+ xcp_cfg = kzalloc(sizeof(*xcp_cfg), GFP_KERNEL);
+ if (!xcp_cfg)
+ return;
+ xcp_cfg->xcp_mgr = adev->xcp_mgr;
+
+ r = kobject_init_and_add(&xcp_cfg->kobj, &xcp_cfg_sysfs_ktype,
+ &adev->dev->kobj, "compute_partition_config");
+ if (r)
+ goto err1;
+
+ r = sysfs_create_files(&xcp_cfg->kobj, xcp_attrs);
+ if (r)
+ goto err1;
+
+ if (adev->gmc.supported_nps_modes != 0) {
+ r = sysfs_create_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ if (r) {
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ goto err1;
+ }
+ }
+
+ mode = (xcp_cfg->xcp_mgr->mode ==
+ AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) ?
+ AMDGPU_SPX_PARTITION_MODE :
+ xcp_cfg->xcp_mgr->mode;
+ r = amdgpu_xcp_get_res_info(xcp_cfg->xcp_mgr, mode, xcp_cfg);
+ if (r) {
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ goto err1;
+ }
+
+ xcp_cfg->mode = mode;
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ rid = xcp_res->id;
+ r = kobject_init_and_add(&xcp_res->kobj,
+ &xcp_cfg_res_sysfs_ktype,
+ &xcp_cfg->kobj, "%s",
+ xcp_res_names[rid]);
+ if (r)
+ goto err;
+ }
+
+ adev->xcp_mgr->xcp_cfg = xcp_cfg;
+ return;
+err:
+ for (j = 0; j < i; j++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ kobject_put(&xcp_res->kobj);
+ }
+
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+err1:
+ kobject_put(&xcp_cfg->kobj);
+}
+
+static void amdgpu_xcp_cfg_sysfs_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_xcp_res_details *xcp_res;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ int i;
+
+ if (!adev->xcp_mgr || !adev->xcp_mgr->xcp_cfg)
+ return;
+
+ xcp_cfg = adev->xcp_mgr->xcp_cfg;
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ xcp_res = &xcp_cfg->xcp_res[i];
+ kobject_put(&xcp_res->kobj);
+ }
+
+ sysfs_remove_file(&xcp_cfg->kobj, &supp_nps_sysfs_mode.attr);
+ sysfs_remove_files(&xcp_cfg->kobj, xcp_attrs);
+ kobject_put(&xcp_cfg->kobj);
+}
+
+/*====================== xcp sysfs - data entries ======================*/
+
+#define to_xcp(x) container_of(x, struct amdgpu_xcp, kobj)
+
+static ssize_t xcp_metrics_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amdgpu_xcp *xcp = to_xcp(kobj);
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ ssize_t size;
+
+ xcp_mgr = xcp->xcp_mgr;
+ size = amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, NULL);
+ if (size <= 0)
+ return size;
+
+ if (size > PAGE_SIZE)
+ return -ENOSPC;
+
+ return amdgpu_dpm_get_xcp_metrics(xcp_mgr->adev, xcp->id, buf);
+}
+
+static umode_t amdgpu_xcp_attrs_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct amdgpu_xcp *xcp = to_xcp(kobj);
+
+ if (!xcp || !xcp->valid)
+ return 0;
+
+ return attr->mode;
+}
+
+static struct kobj_attribute xcp_sysfs_metrics = __ATTR_RO(xcp_metrics);
+
+static struct attribute *amdgpu_xcp_attrs[] = {
+ &xcp_sysfs_metrics.attr,
+ NULL,
+};
+
+static const struct attribute_group amdgpu_xcp_attrs_group = {
+ .attrs = amdgpu_xcp_attrs,
+ .is_visible = amdgpu_xcp_attrs_is_visible
+};
+
+static const struct kobj_type xcp_sysfs_ktype = {
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+static void amdgpu_xcp_sysfs_entries_fini(struct amdgpu_xcp_mgr *xcp_mgr, int n)
+{
+ struct amdgpu_xcp *xcp;
+
+ for (n--; n >= 0; n--) {
+ xcp = &xcp_mgr->xcp[n];
+ if (!xcp->ddev || !xcp->valid)
+ continue;
+ sysfs_remove_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ kobject_put(&xcp->kobj);
+ }
+}
+
+static void amdgpu_xcp_sysfs_entries_init(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_xcp *xcp;
+ int i, r;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ /* Redirect all IOCTLs to the primary device */
+ xcp = &xcp_mgr->xcp[i];
+ if (!xcp->ddev)
+ break;
+ r = kobject_init_and_add(&xcp->kobj, &xcp_sysfs_ktype,
+ &xcp->ddev->dev->kobj, "xcp");
+ if (r)
+ goto out;
+
+ r = sysfs_create_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ if (r)
+ goto out;
+ }
+
+ return;
+out:
+ kobject_put(&xcp->kobj);
+}
+
+static void amdgpu_xcp_sysfs_entries_update(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_xcp *xcp;
+ int i;
+
+ for (i = 0; i < MAX_XCP; i++) {
+ /* Redirect all IOCTLs to the primary device */
+ xcp = &xcp_mgr->xcp[i];
+ if (!xcp->ddev)
+ continue;
+ sysfs_update_group(&xcp->kobj, &amdgpu_xcp_attrs_group);
+ }
+
+ return;
+}
+
+void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev)
+{
+ if (!adev->xcp_mgr)
+ return;
+
+ amdgpu_xcp_cfg_sysfs_init(adev);
+
+ return;
+}
+
+void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!adev->xcp_mgr)
+ return;
+ amdgpu_xcp_sysfs_entries_fini(adev->xcp_mgr, MAX_XCP);
+ amdgpu_xcp_cfg_sysfs_fini(adev);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index 32775260556f..454b33f889fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -56,6 +56,30 @@ enum AMDGPU_XCP_STATE {
AMDGPU_XCP_RESUME,
};
+enum amdgpu_xcp_res_id {
+ AMDGPU_XCP_RES_XCC,
+ AMDGPU_XCP_RES_DMA,
+ AMDGPU_XCP_RES_DEC,
+ AMDGPU_XCP_RES_JPEG,
+ AMDGPU_XCP_RES_MAX,
+};
+
+struct amdgpu_xcp_res_details {
+ enum amdgpu_xcp_res_id id;
+ u8 num_inst;
+ u8 num_shared;
+ struct kobject kobj;
+};
+
+struct amdgpu_xcp_cfg {
+ u8 mode;
+ struct amdgpu_xcp_res_details xcp_res[AMDGPU_XCP_RES_MAX];
+ u8 num_res;
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ struct kobject kobj;
+ u16 compatible_nps_modes;
+};
+
struct amdgpu_xcp_ip_funcs {
int (*prepare_suspend)(void *handle, uint32_t inst_mask);
int (*suspend)(void *handle, uint32_t inst_mask);
@@ -84,6 +108,8 @@ struct amdgpu_xcp {
struct drm_driver *driver;
struct drm_vma_offset_manager *vma_offset_manager;
struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
+ struct amdgpu_xcp_mgr *xcp_mgr;
+ struct kobject kobj;
};
struct amdgpu_xcp_mgr {
@@ -97,6 +123,9 @@ struct amdgpu_xcp_mgr {
/* Used to determine KFD memory size limits per XCP */
unsigned int num_xcp_per_mem_partition;
+ struct amdgpu_xcp_cfg *xcp_cfg;
+ uint32_t supp_xcp_modes;
+ uint32_t avail_xcp_modes;
};
struct amdgpu_xcp_mgr_funcs {
@@ -108,7 +137,9 @@ struct amdgpu_xcp_mgr_funcs {
struct amdgpu_xcp_ip *ip);
int (*get_xcp_mem_id)(struct amdgpu_xcp_mgr *xcp_mgr,
struct amdgpu_xcp *xcp, uint8_t *mem_id);
-
+ int (*get_xcp_res_info)(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg);
int (*prepare_suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int (*suspend)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
int (*prepare_resume)(struct amdgpu_xcp_mgr *xcp_mgr, int xcp_id);
@@ -146,6 +177,9 @@ int amdgpu_xcp_open_device(struct amdgpu_device *adev,
void amdgpu_xcp_release_sched(struct amdgpu_device *adev,
struct amdgpu_ctx_entity *entity);
+void amdgpu_xcp_sysfs_init(struct amdgpu_device *adev);
+void amdgpu_xcp_sysfs_fini(struct amdgpu_device *adev);
+
#define amdgpu_xcp_select_scheds(adev, e, c, d, x, y) \
((adev)->xcp_mgr && (adev)->xcp_mgr->funcs && \
(adev)->xcp_mgr->funcs->select_scheds ? \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 7de449fae1e3..d9ad37711c3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -40,6 +40,11 @@
#define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210
#define smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK 0x12200218
+#define XGMI_STATE_DISABLE 0xD1
+#define XGMI_STATE_LS0 0x81
+#define XGMI_LINK_ACTIVE 1
+#define XGMI_LINK_INACTIVE 0
+
static DEFINE_MUTEX(xgmi_mutex);
#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
@@ -289,6 +294,72 @@ static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = {
SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)},
};
+int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num)
+{
+ int link_map_6_4_x[8] = { 0, 3, 1, 2, 7, 6, 4, 5 };
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ if (link_num < ARRAY_SIZE(link_map_6_4_x))
+ return link_map_6_4_x[link_num];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
+
+static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+ const u32 smn_xgmi_6_4_pcs_state_hist1[2] = { 0x11a00070, 0x11b00070 };
+ const u32 smn_xgmi_6_4_1_pcs_state_hist1[2] = { 0x12100070,
+ 0x11b00070 };
+ u32 i, n;
+ u64 addr;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ n = ARRAY_SIZE(smn_xgmi_6_4_pcs_state_hist1);
+ addr = smn_xgmi_6_4_pcs_state_hist1[global_link_num % n];
+ break;
+ case IP_VERSION(6, 4, 1):
+ n = ARRAY_SIZE(smn_xgmi_6_4_1_pcs_state_hist1);
+ addr = smn_xgmi_6_4_1_pcs_state_hist1[global_link_num % n];
+ break;
+ default:
+ return U32_MAX;
+ }
+
+ i = global_link_num / n;
+ addr += adev->asic_funcs->encode_ext_smn_addressing(i);
+
+ return RREG32_PCIE_EXT(addr);
+}
+
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+ u32 xgmi_state_reg_val;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
+ xgmi_state_reg_val = xgmi_v6_4_get_link_status(adev, global_link_num);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_DISABLE)
+ return -ENOLINK;
+
+ if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_LS0)
+ return XGMI_LINK_ACTIVE;
+
+ return XGMI_LINK_INACTIVE;
+}
+
/**
* DOC: AMDGPU XGMI Support
*
@@ -667,6 +738,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
task_barrier_init(&hive->tb);
hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
hive->hi_req_gpu = NULL;
+ atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
/*
* hive pstate on boot is high in vega20 so we have to go to low
@@ -776,28 +848,88 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
* num_hops[2:0] = number of hops
*/
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev)
+ struct amdgpu_device *peer_adev)
{
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
uint8_t num_hops_mask = 0x7;
int i;
+ if (!adev->gmc.xgmi.supported)
+ return 0;
+
for (i = 0 ; i < top->num_nodes; ++i)
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
return top->nodes[i].num_hops & num_hops_mask;
- return -EINVAL;
+
+ dev_err(adev->dev, "Failed to get xgmi hops count for peer %d.\n",
+ peer_adev->gmc.xgmi.physical_node_id);
+
+ return 0;
}
-int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev)
+int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
+ enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
+ uint32_t *min_bw, uint32_t *max_bw)
+{
+ bool peer_mode = bw_mode == AMDGPU_XGMI_BW_MODE_PER_PEER;
+ int unit_scale = bw_unit == AMDGPU_XGMI_BW_UNIT_MBYTES ? 1000 : 1;
+ int num_lanes = adev->gmc.xgmi.max_width;
+ int speed = adev->gmc.xgmi.max_speed;
+ int num_links = !peer_mode ? 1 : -1;
+
+ if (!(min_bw && max_bw))
+ return -EINVAL;
+
+ *min_bw = 0;
+ *max_bw = 0;
+
+ if (!adev->gmc.xgmi.supported)
+ return -ENODATA;
+
+ if (peer_mode && !peer_adev)
+ return -EINVAL;
+
+ if (peer_mode) {
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0 ; i < top->num_nodes; ++i) {
+ if (top->nodes[i].node_id != peer_adev->gmc.xgmi.node_id)
+ continue;
+
+ num_links = top->nodes[i].num_links;
+ break;
+ }
+ }
+
+ if (num_links == -1) {
+ dev_err(adev->dev, "Failed to get number of xgmi links for peer %d.\n",
+ peer_adev->gmc.xgmi.physical_node_id);
+ } else if (num_links) {
+ int per_link_bw = (speed * num_lanes * unit_scale)/BITS_PER_BYTE;
+
+ *min_bw = per_link_bw;
+ *max_bw = num_links * per_link_bw;
+ }
+
+ return 0;
+}
+
+bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
{
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
int i;
+ /* Sharing should always be enabled for non-SRIOV. */
+ if (!amdgpu_sriov_vf(adev))
+ return true;
+
for (i = 0 ; i < top->num_nodes; ++i)
if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
- return top->nodes[i].num_links;
- return -EINVAL;
+ return !!top->nodes[i].is_sharing_enabled;
+
+ return false;
}
/*
@@ -860,8 +992,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
if (!adev->gmc.xgmi.supported)
return 0;
- if (!adev->gmc.xgmi.pending_reset &&
- amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
ret = psp_xgmi_initialize(&adev->psp, false, true);
if (ret) {
dev_err(adev->dev,
@@ -907,8 +1038,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
task_barrier_add_task(&hive->tb);
- if (!adev->gmc.xgmi.pending_reset &&
- amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
/* update node list for other device in the hive */
if (tmp_adev != adev) {
@@ -985,7 +1115,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
}
}
- if (!ret && !adev->gmc.xgmi.pending_reset)
+ if (!ret)
ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
exit_unlock:
@@ -1066,11 +1196,13 @@ static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_ban
if (ext_error_code != 0 && ext_error_code != 9)
count = 0ULL;
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count);
break;
case ACA_SMU_TYPE_CE:
count = ext_error_code == 6 ? count : 0ULL;
- ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count);
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, count);
break;
default:
return -EINVAL;
@@ -1105,6 +1237,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_comm
switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL,
&xgmi_v6_4_0_aca_info, NULL);
if (r)
@@ -1164,6 +1297,7 @@ static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++)
pcs_clear_status(adev,
xgmi3x16_pcs_err_status_reg_v6_4[i]);
@@ -1198,6 +1332,7 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
xgmi_v6_4_0_reset_ras_error_count(adev);
break;
default:
@@ -1223,7 +1358,9 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
if (amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
IP_VERSION(6, 1, 0) ||
amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
- IP_VERSION(6, 4, 0)) {
+ IP_VERSION(6, 4, 0) ||
+ amdgpu_ip_version(adev, XGMI_HWIP, 0) ==
+ IP_VERSION(6, 4, 1)) {
pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0];
field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields);
} else {
@@ -1331,6 +1468,7 @@ static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev,
switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
/* check xgmi3x16 pcs error */
for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) {
data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]);
@@ -1427,6 +1565,7 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
{
switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
case IP_VERSION(6, 4, 0):
+ case IP_VERSION(6, 4, 1):
xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status);
break;
default:
@@ -1500,3 +1639,148 @@ int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev)
return 0;
}
+
+static void amdgpu_xgmi_reset_on_init_work(struct work_struct *work)
+{
+ struct amdgpu_hive_info *hive =
+ container_of(work, struct amdgpu_hive_info, reset_on_init_work);
+ struct amdgpu_reset_context reset_context;
+ struct amdgpu_device *tmp_adev;
+ struct list_head device_list;
+ int r;
+
+ mutex_lock(&hive->hive_lock);
+
+ INIT_LIST_HEAD(&device_list);
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ list_add_tail(&tmp_adev->reset_list, &device_list);
+
+ tmp_adev = list_first_entry(&device_list, struct amdgpu_device,
+ reset_list);
+ amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+
+ reset_context.method = AMD_RESET_METHOD_ON_INIT;
+ reset_context.reset_req_dev = tmp_adev;
+ reset_context.hive = hive;
+ reset_context.reset_device_list = &device_list;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
+
+ amdgpu_reset_do_xgmi_reset_on_init(&reset_context);
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ r = amdgpu_ras_init_badpage_info(tmp_adev);
+ if (r && r != -EHWPOISON)
+ dev_err(tmp_adev->dev,
+ "error during bad page data initialization");
+ }
+}
+
+static void amdgpu_xgmi_schedule_reset_on_init(struct amdgpu_hive_info *hive)
+{
+ INIT_WORK(&hive->reset_on_init_work, amdgpu_xgmi_reset_on_init_work);
+ amdgpu_reset_domain_schedule(hive->reset_domain,
+ &hive->reset_on_init_work);
+}
+
+int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_hive_info *hive;
+ bool reset_scheduled;
+ int num_devs;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (!hive)
+ return -EINVAL;
+
+ mutex_lock(&hive->hive_lock);
+ num_devs = atomic_read(&hive->number_devices);
+ reset_scheduled = false;
+ if (num_devs == adev->gmc.xgmi.num_physical_nodes) {
+ amdgpu_xgmi_schedule_reset_on_init(hive);
+ reset_scheduled = true;
+ }
+
+ mutex_unlock(&hive->hive_lock);
+ amdgpu_put_xgmi_hive(hive);
+
+ if (reset_scheduled)
+ flush_work(&hive->reset_on_init_work);
+
+ return 0;
+}
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive,
+ int req_nps_mode)
+{
+ struct amdgpu_device *tmp_adev;
+ int cur_nps_mode, r;
+
+ /* This is expected to be called only during unload of driver. The
+ * request needs to be placed only once for all devices in the hive. If
+ * one of them fail, revert the request for previous successful devices.
+ * After placing the request, make hive mode as UNKNOWN so that other
+ * devices don't request anymore.
+ */
+ mutex_lock(&hive->hive_lock);
+ if (atomic_read(&hive->requested_nps_mode) ==
+ UNKNOWN_MEMORY_PARTITION_MODE) {
+ dev_dbg(adev->dev, "Unexpected entry for hive NPS change");
+ mutex_unlock(&hive->hive_lock);
+ return 0;
+ }
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ r = adev->gmc.gmc_funcs->request_mem_partition_mode(
+ tmp_adev, req_nps_mode);
+ if (r)
+ break;
+ }
+ if (r) {
+ /* Request back current mode if one of the requests failed */
+ cur_nps_mode =
+ adev->gmc.gmc_funcs->query_mem_partition_mode(tmp_adev);
+ list_for_each_entry_continue_reverse(
+ tmp_adev, &hive->device_list, gmc.xgmi.head)
+ adev->gmc.gmc_funcs->request_mem_partition_mode(
+ tmp_adev, cur_nps_mode);
+ }
+ /* Set to UNKNOWN so that other devices don't request anymore */
+ atomic_set(&hive->requested_nps_mode, UNKNOWN_MEMORY_PARTITION_MODE);
+ mutex_unlock(&hive->hive_lock);
+
+ return r;
+}
+
+bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev)
+{
+ return (amdgpu_use_xgmi_p2p && adev != bo_adev &&
+ adev->gmc.xgmi.hive_id &&
+ adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
+}
+
+void amdgpu_xgmi_early_init(struct amdgpu_device *adev)
+{
+ if (!adev->gmc.xgmi.supported)
+ return;
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 4, 1):
+ case IP_VERSION(9, 4, 2):
+ adev->gmc.xgmi.max_speed = XGMI_SPEED_25GT;
+ adev->gmc.xgmi.max_width = 16;
+ break;
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
+ adev->gmc.xgmi.max_speed = XGMI_SPEED_32GT;
+ adev->gmc.xgmi.max_width = 16;
+ break;
+ default:
+ break;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index a3bfc16de6d4..f994be985f42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -23,9 +23,14 @@
#define __AMDGPU_XGMI_H__
#include <drm/task_barrier.h>
-#include "amdgpu_psp.h"
#include "amdgpu_ras.h"
+enum amdgpu_xgmi_link_speed {
+ XGMI_SPEED_16GT = 16,
+ XGMI_SPEED_25GT = 25,
+ XGMI_SPEED_32GT = 32
+};
+
struct amdgpu_hive_info {
struct kobject kobj;
uint64_t hive_id;
@@ -45,6 +50,8 @@ struct amdgpu_hive_info {
struct amdgpu_reset_domain *reset_domain;
atomic_t ras_recovery;
struct ras_event_manager event_mgr;
+ struct work_struct reset_on_init_work;
+ atomic_t requested_nps_mode;
};
struct amdgpu_pcs_ras_field {
@@ -53,27 +60,74 @@ struct amdgpu_pcs_ras_field {
uint32_t pcs_err_shift;
};
-extern struct amdgpu_xgmi_ras xgmi_ras;
+/**
+ * Bandwidth range reporting comes in two modes.
+ *
+ * PER_LINK - range for any xgmi link
+ * PER_PEER - range of max of single xgmi link to max of multiple links based on source peer
+ */
+enum amdgpu_xgmi_bw_mode {
+ AMDGPU_XGMI_BW_MODE_PER_LINK = 0,
+ AMDGPU_XGMI_BW_MODE_PER_PEER
+};
+
+enum amdgpu_xgmi_bw_unit {
+ AMDGPU_XGMI_BW_UNIT_GBYTES = 0,
+ AMDGPU_XGMI_BW_UNIT_MBYTES
+};
+
+struct amdgpu_xgmi_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+extern struct amdgpu_xgmi_ras xgmi_ras;
+
+struct amdgpu_xgmi {
+ /* from psp */
+ u64 node_id;
+ u64 hive_id;
+ /* fixed per family */
+ u64 node_segment_size;
+ /* physical node (0-3) */
+ unsigned physical_node_id;
+ /* number of nodes (0-4) */
+ unsigned num_physical_nodes;
+ /* gpu list in the same hive */
+ struct list_head head;
+ bool supported;
+ struct ras_common_if *ras_if;
+ bool connected_to_cpu;
+ struct amdgpu_xgmi_ras *ras;
+ enum amdgpu_xgmi_link_speed max_speed;
+ uint8_t max_width;
+};
+
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
-int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev);
-int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
- struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev,
+ enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit,
+ uint32_t *min_bw, uint32_t *max_bw);
+bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
uint64_t addr);
-static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev)
-{
- return (amdgpu_use_xgmi_p2p &&
- adev != bo_adev &&
- adev->gmc.xgmi.hive_id &&
- adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
-}
+bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev);
int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev);
+
+int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive,
+ int req_nps_mode);
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev,
+ int global_link_num);
+int amdgpu_xgmi_get_ext_link(struct amdgpu_device *adev, int link_num);
+
+void amdgpu_xgmi_early_init(struct amdgpu_device *adev);
+uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 6e9eeaeb3de1..92ca13097aaa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -28,17 +28,21 @@
#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64
#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4
-
+#define AMD_SRIOV_MSG_TMR_OFFSET_KB 2048
+#define AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB 2
+#define AMD_SRIOV_RAS_TELEMETRY_SIZE_KB 64
/*
* layout
- * 0 64KB 65KB 66KB
- * | VBIOS | PF2VF | VF2PF | Bad Page | ...
- * | 64KB | 1KB | 1KB |
+ * 0 64KB 65KB 66KB 68KB 132KB
+ * | VBIOS | PF2VF | VF2PF | Bad Page | RAS Telemetry Region | ...
+ * | 64KB | 1KB | 1KB | 2KB | 64KB | ...
*/
+
#define AMD_SRIOV_MSG_SIZE_KB 1
#define AMD_SRIOV_MSG_PF2VF_OFFSET_KB AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB
#define AMD_SRIOV_MSG_VF2PF_OFFSET_KB (AMD_SRIOV_MSG_PF2VF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
#define AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB (AMD_SRIOV_MSG_VF2PF_OFFSET_KB + AMD_SRIOV_MSG_SIZE_KB)
+#define AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB (AMD_SRIOV_MSG_BAD_PAGE_OFFSET_KB + AMD_SRIOV_MSG_BAD_PAGE_SIZE_KB)
/*
* PF2VF history log:
@@ -86,30 +90,61 @@ enum amd_sriov_ucode_engine_id {
union amd_sriov_msg_feature_flags {
struct {
- uint32_t error_log_collect : 1;
- uint32_t host_load_ucodes : 1;
- uint32_t host_flr_vramlost : 1;
- uint32_t mm_bw_management : 1;
- uint32_t pp_one_vf_mode : 1;
- uint32_t reg_indirect_acc : 1;
- uint32_t av1_support : 1;
- uint32_t vcn_rb_decouple : 1;
- uint32_t mes_info_enable : 1;
- uint32_t reserved : 23;
+ uint32_t error_log_collect : 1;
+ uint32_t host_load_ucodes : 1;
+ uint32_t host_flr_vramlost : 1;
+ uint32_t mm_bw_management : 1;
+ uint32_t pp_one_vf_mode : 1;
+ uint32_t reg_indirect_acc : 1;
+ uint32_t av1_support : 1;
+ uint32_t vcn_rb_decouple : 1;
+ uint32_t mes_info_dump_enable : 1;
+ uint32_t ras_caps : 1;
+ uint32_t ras_telemetry : 1;
+ uint32_t ras_cper : 1;
+ uint32_t reserved : 20;
} flags;
uint32_t all;
};
union amd_sriov_reg_access_flags {
struct {
- uint32_t vf_reg_access_ih : 1;
- uint32_t vf_reg_access_mmhub : 1;
- uint32_t vf_reg_access_gc : 1;
- uint32_t reserved : 29;
+ uint32_t vf_reg_access_ih : 1;
+ uint32_t vf_reg_access_mmhub : 1;
+ uint32_t vf_reg_access_gc : 1;
+ uint32_t vf_reg_access_l1_tlb_cntl : 1;
+ uint32_t reserved : 28;
} flags;
uint32_t all;
};
+union amd_sriov_ras_caps {
+ struct {
+ uint64_t block_umc : 1;
+ uint64_t block_sdma : 1;
+ uint64_t block_gfx : 1;
+ uint64_t block_mmhub : 1;
+ uint64_t block_athub : 1;
+ uint64_t block_pcie_bif : 1;
+ uint64_t block_hdp : 1;
+ uint64_t block_xgmi_wafl : 1;
+ uint64_t block_df : 1;
+ uint64_t block_smn : 1;
+ uint64_t block_sem : 1;
+ uint64_t block_mp0 : 1;
+ uint64_t block_mp1 : 1;
+ uint64_t block_fuse : 1;
+ uint64_t block_mca : 1;
+ uint64_t block_vcn : 1;
+ uint64_t block_jpeg : 1;
+ uint64_t block_ih : 1;
+ uint64_t block_mpio : 1;
+ uint64_t poison_propogation_mode : 1;
+ uint64_t reserved : 44;
+ } bits;
+ uint64_t all;
+};
+
union amd_sriov_msg_os_info {
struct {
uint32_t windows : 1;
@@ -158,7 +193,7 @@ struct amd_sriov_msg_pf2vf_info_header {
uint32_t reserved[2];
};
-#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (49)
+#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (55)
struct amd_sriov_msg_pf2vf_info {
/* header contains size and version */
struct amd_sriov_msg_pf2vf_info_header header;
@@ -211,6 +246,12 @@ struct amd_sriov_msg_pf2vf_info {
uint32_t pcie_atomic_ops_support_flags;
/* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */
uint32_t gpu_capacity;
+ /* vf bdf on host pci tree for debug only */
+ uint32_t bdf_on_host;
+ uint32_t more_bp; //Reserved for future use.
+ union amd_sriov_ras_caps ras_en_caps;
+ union amd_sriov_ras_caps ras_telemetry_en_caps;
+
/* reserved */
uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE];
} __packed;
@@ -283,22 +324,93 @@ enum amd_sriov_mailbox_request_message {
MB_REQ_MSG_REL_GPU_FINI_ACCESS,
MB_REQ_MSG_REQ_GPU_RESET_ACCESS,
MB_REQ_MSG_REQ_GPU_INIT_DATA,
+ MB_REQ_MSG_PSP_VF_CMD_RELAY,
MB_REQ_MSG_LOG_VF_ERROR = 200,
+ MB_REQ_MSG_READY_TO_RESET = 201,
+ MB_REQ_MSG_RAS_POISON = 202,
+ MB_REQ_RAS_ERROR_COUNT = 203,
+ MB_REQ_RAS_CPER_DUMP = 204,
+ MB_REQ_RAS_BAD_PAGES = 205,
};
/* mailbox message send from host to guest */
enum amd_sriov_mailbox_response_message {
- MB_RES_MSG_CLR_MSG_BUF = 0,
- MB_RES_MSG_READY_TO_ACCESS_GPU = 1,
- MB_RES_MSG_FLR_NOTIFICATION,
- MB_RES_MSG_FLR_NOTIFICATION_COMPLETION,
- MB_RES_MSG_SUCCESS,
- MB_RES_MSG_FAIL,
- MB_RES_MSG_QUERY_ALIVE,
- MB_RES_MSG_GPU_INIT_DATA_READY,
-
- MB_RES_MSG_TEXT_MESSAGE = 255
+ MB_RES_MSG_CLR_MSG_BUF = 0,
+ MB_RES_MSG_READY_TO_ACCESS_GPU = 1,
+ MB_RES_MSG_FLR_NOTIFICATION = 2,
+ MB_RES_MSG_FLR_NOTIFICATION_COMPLETION = 3,
+ MB_RES_MSG_SUCCESS = 4,
+ MB_RES_MSG_FAIL = 5,
+ MB_RES_MSG_QUERY_ALIVE = 6,
+ MB_RES_MSG_GPU_INIT_DATA_READY = 7,
+ MB_RES_MSG_RAS_POISON_READY = 8,
+ MB_RES_MSG_PF_SOFT_FLR_NOTIFICATION = 9,
+ MB_RES_MSG_GPU_RMA = 10,
+ MB_RES_MSG_RAS_ERROR_COUNT_READY = 11,
+ MB_REQ_RAS_CPER_DUMP_READY = 14,
+ MB_RES_MSG_RAS_BAD_PAGES_READY = 15,
+ MB_RES_MSG_RAS_BAD_PAGES_NOTIFICATION = 16,
+ MB_RES_MSG_UNRECOV_ERR_NOTIFICATION = 17,
+ MB_RES_MSG_TEXT_MESSAGE = 255
+};
+
+enum amd_sriov_ras_telemetry_gpu_block {
+ RAS_TELEMETRY_GPU_BLOCK_UMC = 0,
+ RAS_TELEMETRY_GPU_BLOCK_SDMA = 1,
+ RAS_TELEMETRY_GPU_BLOCK_GFX = 2,
+ RAS_TELEMETRY_GPU_BLOCK_MMHUB = 3,
+ RAS_TELEMETRY_GPU_BLOCK_ATHUB = 4,
+ RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF = 5,
+ RAS_TELEMETRY_GPU_BLOCK_HDP = 6,
+ RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL = 7,
+ RAS_TELEMETRY_GPU_BLOCK_DF = 8,
+ RAS_TELEMETRY_GPU_BLOCK_SMN = 9,
+ RAS_TELEMETRY_GPU_BLOCK_SEM = 10,
+ RAS_TELEMETRY_GPU_BLOCK_MP0 = 11,
+ RAS_TELEMETRY_GPU_BLOCK_MP1 = 12,
+ RAS_TELEMETRY_GPU_BLOCK_FUSE = 13,
+ RAS_TELEMETRY_GPU_BLOCK_MCA = 14,
+ RAS_TELEMETRY_GPU_BLOCK_VCN = 15,
+ RAS_TELEMETRY_GPU_BLOCK_JPEG = 16,
+ RAS_TELEMETRY_GPU_BLOCK_IH = 17,
+ RAS_TELEMETRY_GPU_BLOCK_MPIO = 18,
+ RAS_TELEMETRY_GPU_BLOCK_COUNT = 19,
+};
+
+struct amd_sriov_ras_telemetry_header {
+ uint32_t checksum;
+ uint32_t used_size;
+ uint32_t reserved[2];
+};
+
+struct amd_sriov_ras_telemetry_error_count {
+ struct {
+ uint32_t ce_count;
+ uint32_t ue_count;
+ uint32_t de_count;
+ uint32_t ce_overflow_count;
+ uint32_t ue_overflow_count;
+ uint32_t de_overflow_count;
+ uint32_t reserved[6];
+ } block[RAS_TELEMETRY_GPU_BLOCK_COUNT];
+};
+
+struct amd_sriov_ras_cper_dump {
+ uint32_t more;
+ uint64_t overflow_count;
+ uint64_t count;
+ uint64_t wptr;
+ uint32_t buf[];
+};
+
+struct amdsriov_ras_telemetry {
+ struct amd_sriov_ras_telemetry_header header;
+
+ union {
+ struct amd_sriov_ras_telemetry_error_count error_count;
+ struct amd_sriov_ras_cper_dump cper_dump;
+ } body;
};
/* version data stored in MAILBOX_MSGBUF_RCV_DW1 for future expansion */
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index 5e8833e4fed2..1c083304ae77 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -77,7 +77,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
ring->xcp_id = AMDGPU_XCP_NO_PARTITION;
if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id;
- if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
+ if ((adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) ||
+ (ring->funcs->type == AMDGPU_RING_TYPE_CPER))
return;
inst_mask = 1 << inst_idx;
@@ -447,6 +448,91 @@ static int __aqua_vanjaram_get_xcp_ip_info(struct amdgpu_xcp_mgr *xcp_mgr, int x
return 0;
}
+static int __aqua_vanjaram_get_px_mode_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int px_mode, int *num_xcp,
+ uint16_t *nps_modes)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+
+ if (!num_xcp || !nps_modes || !(xcp_mgr->supp_xcp_modes & BIT(px_mode)))
+ return -EINVAL;
+
+ switch (px_mode) {
+ case AMDGPU_SPX_PARTITION_MODE:
+ *num_xcp = 1;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE);
+ break;
+ case AMDGPU_DPX_PARTITION_MODE:
+ *num_xcp = 2;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS2_PARTITION_MODE);
+ break;
+ case AMDGPU_TPX_PARTITION_MODE:
+ *num_xcp = 3;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ case AMDGPU_QPX_PARTITION_MODE:
+ *num_xcp = 4;
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ case AMDGPU_CPX_PARTITION_MODE:
+ *num_xcp = NUM_XCC(adev->gfx.xcc_mask);
+ *nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ if (amdgpu_sriov_vf(adev))
+ *nps_modes |= BIT(AMDGPU_NPS2_PARTITION_MODE);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr,
+ int mode,
+ struct amdgpu_xcp_cfg *xcp_cfg)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+ int max_res[AMDGPU_XCP_RES_MAX] = {};
+ bool res_lt_xcp;
+ int num_xcp, i, r;
+ u16 nps_modes;
+
+ if (!(xcp_mgr->supp_xcp_modes & BIT(mode)))
+ return -EINVAL;
+
+ max_res[AMDGPU_XCP_RES_XCC] = NUM_XCC(adev->gfx.xcc_mask);
+ max_res[AMDGPU_XCP_RES_DMA] = adev->sdma.num_instances;
+ max_res[AMDGPU_XCP_RES_DEC] = adev->vcn.num_vcn_inst;
+ max_res[AMDGPU_XCP_RES_JPEG] = adev->jpeg.num_jpeg_inst;
+
+ r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp, &nps_modes);
+ if (r)
+ return r;
+
+ xcp_cfg->compatible_nps_modes =
+ (adev->gmc.supported_nps_modes & nps_modes);
+ xcp_cfg->num_res = ARRAY_SIZE(max_res);
+
+ for (i = 0; i < xcp_cfg->num_res; i++) {
+ res_lt_xcp = max_res[i] < num_xcp;
+ xcp_cfg->xcp_res[i].id = i;
+ xcp_cfg->xcp_res[i].num_inst =
+ res_lt_xcp ? 1 : max_res[i] / num_xcp;
+ xcp_cfg->xcp_res[i].num_inst =
+ i == AMDGPU_XCP_RES_JPEG ?
+ xcp_cfg->xcp_res[i].num_inst *
+ adev->jpeg.num_jpeg_rings : xcp_cfg->xcp_res[i].num_inst;
+ xcp_cfg->xcp_res[i].num_shared =
+ res_lt_xcp ? num_xcp / max_res[i] : 1;
+ }
+
+ return 0;
+}
+
static enum amdgpu_gfx_partition
__aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
{
@@ -475,27 +561,31 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr,
enum amdgpu_gfx_partition mode)
{
struct amdgpu_device *adev = xcp_mgr->adev;
- int num_xcc, num_xccs_per_xcp;
+ int num_xcc, num_xccs_per_xcp, r;
+ int num_xcp, nps_mode;
+ u16 supp_nps_modes;
+ bool comp_mode;
+
+ nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+ r = __aqua_vanjaram_get_px_mode_info(xcp_mgr, mode, &num_xcp,
+ &supp_nps_modes);
+ if (r)
+ return false;
+ comp_mode = !!(BIT(nps_mode) & supp_nps_modes);
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
switch (mode) {
case AMDGPU_SPX_PARTITION_MODE:
- return adev->gmc.num_mem_partitions == 1 && num_xcc > 0;
+ return comp_mode && num_xcc > 0;
case AMDGPU_DPX_PARTITION_MODE:
- return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0;
+ return comp_mode && (num_xcc % 4) == 0;
case AMDGPU_TPX_PARTITION_MODE:
- return (adev->gmc.num_mem_partitions == 1 ||
- adev->gmc.num_mem_partitions == 3) &&
- ((num_xcc % 3) == 0);
+ return comp_mode && ((num_xcc % 3) == 0);
case AMDGPU_QPX_PARTITION_MODE:
num_xccs_per_xcp = num_xcc / 4;
- return (adev->gmc.num_mem_partitions == 1 ||
- adev->gmc.num_mem_partitions == 4) &&
- (num_xccs_per_xcp >= 2);
+ return comp_mode && (num_xccs_per_xcp >= 2);
case AMDGPU_CPX_PARTITION_MODE:
- return ((num_xcc > 1) &&
- (adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 4) &&
- (num_xcc % adev->gmc.num_mem_partitions) == 0);
+ return comp_mode && (num_xcc > 1);
default:
return false;
}
@@ -530,6 +620,57 @@ static int __aqua_vanjaram_post_partition_switch(struct amdgpu_xcp_mgr *xcp_mgr,
return ret;
}
+static void
+__aqua_vanjaram_update_supported_modes(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ struct amdgpu_device *adev = xcp_mgr->adev;
+
+ xcp_mgr->supp_xcp_modes = 0;
+
+ switch (NUM_XCC(adev->gfx.xcc_mask)) {
+ case 8:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_DPX_PARTITION_MODE) |
+ BIT(AMDGPU_QPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 6:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_TPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 4:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_DPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ /* this seems only existing in emulation phase */
+ case 2:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+ case 1:
+ xcp_mgr->supp_xcp_modes = BIT(AMDGPU_SPX_PARTITION_MODE) |
+ BIT(AMDGPU_CPX_PARTITION_MODE);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void __aqua_vanjaram_update_available_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr)
+{
+ int mode;
+
+ xcp_mgr->avail_xcp_modes = 0;
+
+ for_each_inst(mode, xcp_mgr->supp_xcp_modes) {
+ if (__aqua_vanjaram_is_valid_mode(xcp_mgr, mode))
+ xcp_mgr->avail_xcp_modes |= BIT(mode);
+ }
+}
+
static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
int mode, int *num_xcps)
{
@@ -578,6 +719,8 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
amdgpu_xcp_init(xcp_mgr, *num_xcps, mode);
ret = __aqua_vanjaram_post_partition_switch(xcp_mgr, flags);
+ if (!ret)
+ __aqua_vanjaram_update_available_partition_mode(xcp_mgr);
unlock:
if (flags & AMDGPU_XCP_OPS_KFD)
amdgpu_amdkfd_unlock_kfd(adev);
@@ -656,9 +799,11 @@ struct amdgpu_xcp_mgr_funcs aqua_vanjaram_xcp_funcs = {
.switch_partition_mode = &aqua_vanjaram_switch_partition_mode,
.query_partition_mode = &aqua_vanjaram_query_partition_mode,
.get_ip_details = &aqua_vanjaram_get_xcp_ip_details,
+ .get_xcp_res_info = &aqua_vanjaram_get_xcp_res_info,
.get_xcp_mem_id = &aqua_vanjaram_get_xcp_mem_id,
.select_scheds = &aqua_vanjaram_select_scheds,
- .update_partition_sched_list = &aqua_vanjaram_update_partition_sched_list
+ .update_partition_sched_list =
+ &aqua_vanjaram_update_partition_sched_list
};
static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
@@ -673,6 +818,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
if (ret)
return ret;
+ __aqua_vanjaram_update_supported_modes(adev->xcp_mgr);
/* TODO: Default memory node affinity init */
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 81d195d366ce..427b073de2fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1444,6 +1444,7 @@ static void atom_get_vbios_pn(struct atom_context *ctx)
if (vbios_str == NULL)
vbios_str += sizeof(BIOS_ATOM_PREFIX) - 1;
}
+ OPTIMIZER_HIDE_VAR(vbios_str);
if (vbios_str != NULL && *vbios_str == 0)
vbios_str++;
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 622634c08c7b..492813ab1b54 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -430,7 +430,7 @@ void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
}
int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
- struct drm_display_mode *mode)
+ const struct drm_display_mode *mode)
{
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct amdgpu_connector_atom_dig *dig_connector;
@@ -458,8 +458,8 @@ bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connect
u8 link_status[DP_LINK_STATUS_SIZE];
struct amdgpu_connector_atom_dig *dig = amdgpu_connector->con_priv;
- if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux, link_status)
- <= 0)
+ if (drm_dp_dpcd_read_link_status(&amdgpu_connector->ddc_bus->aux,
+ link_status) < 0)
return false;
if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count))
return false;
@@ -616,7 +616,7 @@ amdgpu_atombios_dp_link_train_cr(struct amdgpu_atombios_dp_link_train_info *dp_i
drm_dp_link_train_clock_recovery_delay(dp_info->aux, dp_info->dpcd);
if (drm_dp_dpcd_read_link_status(dp_info->aux,
- dp_info->link_status) <= 0) {
+ dp_info->link_status) < 0) {
DRM_ERROR("displayport link status failed\n");
break;
}
@@ -681,7 +681,7 @@ amdgpu_atombios_dp_link_train_ce(struct amdgpu_atombios_dp_link_train_info *dp_i
drm_dp_link_train_channel_eq_delay(dp_info->aux, dp_info->dpcd);
if (drm_dp_dpcd_read_link_status(dp_info->aux,
- dp_info->link_status) <= 0) {
+ dp_info->link_status) < 0) {
DRM_ERROR("displayport link status failed\n");
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
index f59d85eaddf0..3e24acf8133f 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h
@@ -32,7 +32,7 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder,
void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector,
const struct drm_display_mode *mode);
int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector,
- struct drm_display_mode *mode);
+ const struct drm_display_mode *mode);
bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connector);
void amdgpu_atombios_dp_set_rx_power_state(struct drm_connector *connector,
u8 power_state);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index cf1d5d462b67..9cd63b4177bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1985,9 +1985,9 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.query_video_codecs = &cik_query_video_codecs,
};
-static int cik_common_early_init(void *handle)
+static int cik_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->smc_rreg = &cik_smc_rreg;
adev->smc_wreg = &cik_smc_wreg;
@@ -2124,19 +2124,9 @@ static int cik_common_early_init(void *handle)
return 0;
}
-static int cik_common_sw_init(void *handle)
+static int cik_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
-
-static int cik_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-static int cik_common_hw_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* move the golden regs per IP block */
cik_init_golden_registers(adev);
@@ -2148,48 +2138,36 @@ static int cik_common_hw_init(void *handle)
return 0;
}
-static int cik_common_hw_fini(void *handle)
+static int cik_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int cik_common_suspend(void *handle)
+static int cik_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_common_hw_fini(adev);
+ return cik_common_hw_init(ip_block);
}
-static int cik_common_resume(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_common_hw_init(adev);
-}
-
-static bool cik_common_is_idle(void *handle)
+static bool cik_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int cik_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-static int cik_common_soft_reset(void *handle)
+
+static int cik_common_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* XXX hard reset?? */
return 0;
}
-static int cik_common_set_clockgating_state(void *handle,
+static int cik_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int cik_common_set_powergating_state(void *handle,
+static int cik_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -2198,20 +2176,13 @@ static int cik_common_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_common_ip_funcs = {
.name = "cik_common",
.early_init = cik_common_early_init,
- .late_init = NULL,
- .sw_init = cik_common_sw_init,
- .sw_fini = cik_common_sw_fini,
.hw_init = cik_common_hw_init,
.hw_fini = cik_common_hw_fini,
- .suspend = cik_common_suspend,
.resume = cik_common_resume,
.is_idle = cik_common_is_idle,
- .wait_for_idle = cik_common_wait_for_idle,
.soft_reset = cik_common_soft_reset,
.set_clockgating_state = cik_common_set_clockgating_state,
.set_powergating_state = cik_common_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ip_block_version cik_common_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 576baa9dbb0e..41f4705bdbbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -283,9 +283,9 @@ static void cik_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int cik_ih_early_init(void *handle)
+static int cik_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -297,10 +297,10 @@ static int cik_ih_early_init(void *handle)
return 0;
}
-static int cik_ih_sw_init(void *handle)
+static int cik_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
@@ -311,9 +311,9 @@ static int cik_ih_sw_init(void *handle)
return r;
}
-static int cik_ih_sw_fini(void *handle)
+static int cik_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -321,39 +321,33 @@ static int cik_ih_sw_fini(void *handle)
return 0;
}
-static int cik_ih_hw_init(void *handle)
+static int cik_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return cik_ih_irq_init(adev);
}
-static int cik_ih_hw_fini(void *handle)
+static int cik_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- cik_ih_irq_disable(adev);
+ cik_ih_irq_disable(ip_block->adev);
return 0;
}
-static int cik_ih_suspend(void *handle)
+static int cik_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_ih_hw_fini(adev);
+ return cik_ih_hw_fini(ip_block);
}
-static int cik_ih_resume(void *handle)
+static int cik_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_ih_hw_init(adev);
+ return cik_ih_hw_init(ip_block);
}
-static bool cik_ih_is_idle(void *handle)
+static bool cik_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -362,11 +356,11 @@ static bool cik_ih_is_idle(void *handle)
return true;
}
-static int cik_ih_wait_for_idle(void *handle)
+static int cik_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -378,9 +372,9 @@ static int cik_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int cik_ih_soft_reset(void *handle)
+static int cik_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -408,13 +402,13 @@ static int cik_ih_soft_reset(void *handle)
return 0;
}
-static int cik_ih_set_clockgating_state(void *handle,
+static int cik_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int cik_ih_set_powergating_state(void *handle,
+static int cik_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -423,7 +417,6 @@ static int cik_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_ih_ip_funcs = {
.name = "cik_ih",
.early_init = cik_ih_early_init,
- .late_init = NULL,
.sw_init = cik_ih_sw_init,
.sw_fini = cik_ih_sw_fini,
.hw_init = cik_ih_hw_init,
@@ -435,8 +428,6 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = {
.soft_reset = cik_ih_soft_reset,
.set_clockgating_state = cik_ih_set_clockgating_state,
.set_powergating_state = cik_ih_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs cik_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 952737de9411..9e8715b4739d 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -54,7 +54,9 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
-static int cik_sdma_soft_reset(void *handle);
+static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block);
+
+u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin");
MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin");
@@ -67,9 +69,6 @@ MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin");
MODULE_FIRMWARE("amdgpu/mullins_sdma.bin");
MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin");
-u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
-
-
static void cik_sdma_free_microcode(struct amdgpu_device *adev)
{
int i;
@@ -133,9 +132,11 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
@@ -696,7 +697,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -918,9 +919,9 @@ static void cik_enable_sdma_mgls(struct amdgpu_device *adev,
}
}
-static int cik_sdma_early_init(void *handle)
+static int cik_sdma_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
adev->sdma.num_instances = SDMA_MAX_INSTANCE;
@@ -937,10 +938,10 @@ static int cik_sdma_early_init(void *handle)
return 0;
}
-static int cik_sdma_sw_init(void *handle)
+static int cik_sdma_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r, i;
/* SDMA trap event */
@@ -977,9 +978,9 @@ static int cik_sdma_sw_init(void *handle)
return r;
}
-static int cik_sdma_sw_fini(void *handle)
+static int cik_sdma_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -989,21 +990,16 @@ static int cik_sdma_sw_fini(void *handle)
return 0;
}
-static int cik_sdma_hw_init(void *handle)
+static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = cik_sdma_start(adev);
- if (r)
- return r;
+ struct amdgpu_device *adev = ip_block->adev;
- return r;
+ return cik_sdma_start(adev);
}
-static int cik_sdma_hw_fini(void *handle)
+static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cik_ctx_switch_enable(adev, false);
cik_sdma_enable(adev, false);
@@ -1011,25 +1007,21 @@ static int cik_sdma_hw_fini(void *handle)
return 0;
}
-static int cik_sdma_suspend(void *handle)
+static int cik_sdma_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cik_sdma_hw_fini(adev);
+ return cik_sdma_hw_fini(ip_block);
}
-static int cik_sdma_resume(void *handle)
+static int cik_sdma_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ cik_sdma_soft_reset(ip_block);
- cik_sdma_soft_reset(handle);
-
- return cik_sdma_hw_init(adev);
+ return cik_sdma_hw_init(ip_block);
}
-static bool cik_sdma_is_idle(void *handle)
+static bool cik_sdma_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1039,27 +1031,23 @@ static bool cik_sdma_is_idle(void *handle)
return true;
}
-static int cik_sdma_wait_for_idle(void *handle)
+static int cik_sdma_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
- SRBM_STATUS2__SDMA1_BUSY_MASK);
-
- if (!tmp)
+ if (cik_sdma_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int cik_sdma_soft_reset(void *handle)
+static int cik_sdma_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp;
/* sdma0 */
@@ -1193,11 +1181,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int cik_sdma_set_clockgating_state(void *handle,
+static int cik_sdma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -1208,7 +1196,7 @@ static int cik_sdma_set_clockgating_state(void *handle,
return 0;
}
-static int cik_sdma_set_powergating_state(void *handle,
+static int cik_sdma_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1217,7 +1205,6 @@ static int cik_sdma_set_powergating_state(void *handle,
static const struct amd_ip_funcs cik_sdma_ip_funcs = {
.name = "cik_sdma",
.early_init = cik_sdma_early_init,
- .late_init = NULL,
.sw_init = cik_sdma_sw_init,
.sw_fini = cik_sdma_sw_fini,
.hw_init = cik_sdma_hw_init,
@@ -1229,8 +1216,6 @@ static const struct amd_ip_funcs cik_sdma_ip_funcs = {
.soft_reset = cik_sdma_soft_reset,
.set_clockgating_state = cik_sdma_set_clockgating_state,
.set_powergating_state = cik_sdma_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 06088d52d81c..8aca4f2734f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -51,8 +51,14 @@
#define HPD4_REGISTER_OFFSET (0x1813 - 0x1807)
#define HPD5_REGISTER_OFFSET (0x1816 - 0x1807)
-#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
-#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
+/* audio endpt instance offsets */
+#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780)
+#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780)
+#define AUD2_REGISTER_OFFSET (0x178c - 0x1780)
+#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780)
+#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780)
+#define AUD5_REGISTER_OFFSET (0x179d - 0x1780)
+#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780)
#define PIPEID(x) ((x) << 0)
#define MEID(x) ((x) << 2)
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index 072643787384..2f891fb846d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -274,9 +274,9 @@ static void cz_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int cz_ih_early_init(void *handle)
+static int cz_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -288,10 +288,10 @@ static int cz_ih_early_init(void *handle)
return 0;
}
-static int cz_ih_sw_init(void *handle)
+static int cz_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
@@ -302,9 +302,9 @@ static int cz_ih_sw_init(void *handle)
return r;
}
-static int cz_ih_sw_fini(void *handle)
+static int cz_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -312,10 +312,10 @@ static int cz_ih_sw_fini(void *handle)
return 0;
}
-static int cz_ih_hw_init(void *handle)
+static int cz_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = cz_ih_irq_init(adev);
if (r)
@@ -324,32 +324,26 @@ static int cz_ih_hw_init(void *handle)
return 0;
}
-static int cz_ih_hw_fini(void *handle)
+static int cz_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- cz_ih_irq_disable(adev);
+ cz_ih_irq_disable(ip_block->adev);
return 0;
}
-static int cz_ih_suspend(void *handle)
+static int cz_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cz_ih_hw_fini(adev);
+ return cz_ih_hw_fini(ip_block);
}
-static int cz_ih_resume(void *handle)
+static int cz_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return cz_ih_hw_init(adev);
+ return cz_ih_hw_init(ip_block);
}
-static bool cz_ih_is_idle(void *handle)
+static bool cz_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -358,11 +352,11 @@ static bool cz_ih_is_idle(void *handle)
return true;
}
-static int cz_ih_wait_for_idle(void *handle)
+static int cz_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -374,10 +368,10 @@ static int cz_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int cz_ih_soft_reset(void *handle)
+static int cz_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -404,14 +398,14 @@ static int cz_ih_soft_reset(void *handle)
return 0;
}
-static int cz_ih_set_clockgating_state(void *handle,
+static int cz_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
// TODO
return 0;
}
-static int cz_ih_set_powergating_state(void *handle,
+static int cz_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
// TODO
@@ -421,7 +415,6 @@ static int cz_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs cz_ih_ip_funcs = {
.name = "cz_ih",
.early_init = cz_ih_early_init,
- .late_init = NULL,
.sw_init = cz_ih_sw_init,
.sw_fini = cz_ih_sw_fini,
.hw_init = cz_ih_hw_init,
@@ -433,8 +426,6 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = {
.soft_reset = cz_ih_soft_reset,
.set_clockgating_state = cz_ih_set_clockgating_state,
.set_powergating_state = cz_ih_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs cz_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 70c1399f738d..bf7c22f81cda 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2687,6 +2687,32 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v10_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v10_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v10_0_panic_flush,
+};
+
static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2734,13 +2760,14 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v10_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v10_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v10_0_early_init(void *handle)
+static int dce_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v10_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v10_0_audio_endpt_wreg;
@@ -2765,10 +2792,10 @@ static int dce_v10_0_early_init(void *handle)
return 0;
}
-static int dce_v10_0_sw_init(void *handle)
+static int dce_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2844,9 +2871,9 @@ static int dce_v10_0_sw_init(void *handle)
return 0;
}
-static int dce_v10_0_sw_fini(void *handle)
+static int dce_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
drm_edid_free(adev->mode_info.bios_hardcoded_edid);
@@ -2862,10 +2889,10 @@ static int dce_v10_0_sw_fini(void *handle)
return 0;
}
-static int dce_v10_0_hw_init(void *handle)
+static int dce_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v10_0_init_golden_registers(adev);
@@ -2887,10 +2914,10 @@ static int dce_v10_0_hw_init(void *handle)
return 0;
}
-static int dce_v10_0_hw_fini(void *handle)
+static int dce_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v10_0_hpd_fini(adev);
@@ -2905,9 +2932,9 @@ static int dce_v10_0_hw_fini(void *handle)
return 0;
}
-static int dce_v10_0_suspend(void *handle)
+static int dce_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_display_suspend_helper(adev);
@@ -2917,18 +2944,18 @@ static int dce_v10_0_suspend(void *handle)
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v10_0_hw_fini(handle);
+ return dce_v10_0_hw_fini(ip_block);
}
-static int dce_v10_0_resume(void *handle)
+static int dce_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v10_0_hw_init(handle);
+ ret = dce_v10_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2943,27 +2970,22 @@ static int dce_v10_0_resume(void *handle)
return amdgpu_display_resume_helper(adev);
}
-static bool dce_v10_0_is_idle(void *handle)
+static bool dce_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v10_0_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static bool dce_v10_0_check_soft_reset(void *handle)
+static bool dce_v10_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return dce_v10_0_is_display_hung(adev);
}
-static int dce_v10_0_soft_reset(void *handle)
+static int dce_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (dce_v10_0_is_display_hung(adev))
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
@@ -3053,7 +3075,7 @@ static int dce_v10_0_set_hpd_irq_state(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return 0;
}
@@ -3205,7 +3227,7 @@ static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return;
}
@@ -3307,13 +3329,13 @@ static int dce_v10_0_hpd_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v10_0_set_clockgating_state(void *handle,
+static int dce_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v10_0_set_powergating_state(void *handle,
+static int dce_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3322,7 +3344,6 @@ static int dce_v10_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
.name = "dce_v10_0",
.early_init = dce_v10_0_early_init,
- .late_init = NULL,
.sw_init = dce_v10_0_sw_init,
.sw_fini = dce_v10_0_sw_fini,
.hw_init = dce_v10_0_hw_init,
@@ -3330,13 +3351,10 @@ static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
.suspend = dce_v10_0_suspend,
.resume = dce_v10_0_resume,
.is_idle = dce_v10_0_is_idle,
- .wait_for_idle = dce_v10_0_wait_for_idle,
.check_soft_reset = dce_v10_0_check_soft_reset,
.soft_reset = dce_v10_0_soft_reset,
.set_clockgating_state = dce_v10_0_set_clockgating_state,
.set_powergating_state = dce_v10_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static void
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index f154c24499c8..47e05783c4a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2800,6 +2800,32 @@ static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v11_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v11_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v11_0_panic_flush,
+};
+
static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2847,13 +2873,14 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v11_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v11_0_early_init(void *handle)
+static int dce_v11_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v11_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v11_0_audio_endpt_wreg;
@@ -2891,10 +2918,10 @@ static int dce_v11_0_early_init(void *handle)
return 0;
}
-static int dce_v11_0_sw_init(void *handle)
+static int dce_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2971,9 +2998,9 @@ static int dce_v11_0_sw_init(void *handle)
return 0;
}
-static int dce_v11_0_sw_fini(void *handle)
+static int dce_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
drm_edid_free(adev->mode_info.bios_hardcoded_edid);
@@ -2989,10 +3016,10 @@ static int dce_v11_0_sw_fini(void *handle)
return 0;
}
-static int dce_v11_0_hw_init(void *handle)
+static int dce_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v11_0_init_golden_registers(adev);
@@ -3025,10 +3052,10 @@ static int dce_v11_0_hw_init(void *handle)
return 0;
}
-static int dce_v11_0_hw_fini(void *handle)
+static int dce_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v11_0_hpd_fini(adev);
@@ -3043,9 +3070,9 @@ static int dce_v11_0_hw_fini(void *handle)
return 0;
}
-static int dce_v11_0_suspend(void *handle)
+static int dce_v11_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_display_suspend_helper(adev);
@@ -3055,18 +3082,18 @@ static int dce_v11_0_suspend(void *handle)
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v11_0_hw_fini(handle);
+ return dce_v11_0_hw_fini(ip_block);
}
-static int dce_v11_0_resume(void *handle)
+static int dce_v11_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v11_0_hw_init(handle);
+ ret = dce_v11_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -3081,20 +3108,15 @@ static int dce_v11_0_resume(void *handle)
return amdgpu_display_resume_helper(adev);
}
-static bool dce_v11_0_is_idle(void *handle)
+static bool dce_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v11_0_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int dce_v11_0_soft_reset(void *handle)
+static int dce_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (dce_v11_0_is_display_hung(adev))
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
@@ -3184,7 +3206,7 @@ static int dce_v11_0_set_hpd_irq_state(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return 0;
}
@@ -3336,7 +3358,7 @@ static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return;
}
@@ -3439,13 +3461,13 @@ static int dce_v11_0_hpd_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v11_0_set_clockgating_state(void *handle,
+static int dce_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v11_0_set_powergating_state(void *handle,
+static int dce_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3454,7 +3476,6 @@ static int dce_v11_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v11_0_ip_funcs = {
.name = "dce_v11_0",
.early_init = dce_v11_0_early_init,
- .late_init = NULL,
.sw_init = dce_v11_0_sw_init,
.sw_fini = dce_v11_0_sw_fini,
.hw_init = dce_v11_0_hw_init,
@@ -3462,16 +3483,12 @@ static const struct amd_ip_funcs dce_v11_0_ip_funcs = {
.suspend = dce_v11_0_suspend,
.resume = dce_v11_0_resume,
.is_idle = dce_v11_0_is_idle,
- .wait_for_idle = dce_v11_0_wait_for_idle,
.soft_reset = dce_v11_0_soft_reset,
.set_clockgating_state = dce_v11_0_set_clockgating_state,
.set_powergating_state = dce_v11_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
-static void
-dce_v11_0_encoder_mode_set(struct drm_encoder *encoder,
+static void dce_v11_0_encoder_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index a7fcb135827f..276c025c4c03 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -40,18 +40,25 @@
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
+#include "dce_v6_0.h"
+#include "sid.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
#include "gca/gfx_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+
#include "gmc/gmc_6_0_d.h"
#include "gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
#include "dce/dce_6_0_sh_mask.h"
-#include "gca/gfx_7_2_enum.h"
-#include "dce_v6_0.h"
+
#include "si_enums.h"
static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev);
@@ -59,31 +66,31 @@ static void dce_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static const u32 crtc_offsets[6] =
{
- SI_CRTC0_REGISTER_OFFSET,
- SI_CRTC1_REGISTER_OFFSET,
- SI_CRTC2_REGISTER_OFFSET,
- SI_CRTC3_REGISTER_OFFSET,
- SI_CRTC4_REGISTER_OFFSET,
- SI_CRTC5_REGISTER_OFFSET
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET
};
static const u32 hpd_offsets[] =
{
- mmDC_HPD1_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD2_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD3_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD4_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD5_INT_STATUS - mmDC_HPD1_INT_STATUS,
- mmDC_HPD6_INT_STATUS - mmDC_HPD1_INT_STATUS,
+ HPD0_REGISTER_OFFSET,
+ HPD1_REGISTER_OFFSET,
+ HPD2_REGISTER_OFFSET,
+ HPD3_REGISTER_OFFSET,
+ HPD4_REGISTER_OFFSET,
+ HPD5_REGISTER_OFFSET
};
static const uint32_t dig_offsets[] = {
- SI_CRTC0_REGISTER_OFFSET,
- SI_CRTC1_REGISTER_OFFSET,
- SI_CRTC2_REGISTER_OFFSET,
- SI_CRTC3_REGISTER_OFFSET,
- SI_CRTC4_REGISTER_OFFSET,
- SI_CRTC5_REGISTER_OFFSET,
+ CRTC0_REGISTER_OFFSET,
+ CRTC1_REGISTER_OFFSET,
+ CRTC2_REGISTER_OFFSET,
+ CRTC3_REGISTER_OFFSET,
+ CRTC4_REGISTER_OFFSET,
+ CRTC5_REGISTER_OFFSET,
(0x13830 - 0x7030) >> 2,
};
@@ -206,9 +213,9 @@ static void dce_v6_0_page_flip(struct amdgpu_device *adev,
/* update the scanout addresses */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
upper_32_bits(crtc_base));
+ /* writing to the low address triggers the update */
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
(u32)crtc_base);
-
/* post the write */
RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
}
@@ -218,11 +225,11 @@ static int dce_v6_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
{
if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc))
return -EINVAL;
+
*vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]);
*position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
return 0;
-
}
/**
@@ -242,7 +249,8 @@ static bool dce_v6_0_hpd_sense(struct amdgpu_device *adev,
if (hpd >= adev->mode_info.num_hpd)
return connected;
- if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) & DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
+ if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) &
+ DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK)
connected = true;
return connected;
@@ -279,7 +287,7 @@ static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return;
}
@@ -370,13 +378,41 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev)
return mmDC_GPIO_HPD_A;
}
+static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev)
+{
+ u32 crtc_hung = 0;
+ u32 crtc_status[6];
+ u32 i, j, tmp;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK) {
+ crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ crtc_hung |= (1 << i);
+ }
+ }
+
+ for (j = 0; j < 10; j++) {
+ for (i = 0; i < adev->mode_info.num_crtc; i++) {
+ if (crtc_hung & (1 << i)) {
+ tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]);
+ if (tmp != crtc_status[i])
+ crtc_hung &= ~(1 << i);
+ }
+ }
+ if (crtc_hung == 0)
+ return false;
+ udelay(100);
+ }
+
+ return true;
+}
+
static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev,
bool render)
{
if (!render)
WREG32(mmVGA_RENDER_CONTROL,
- RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL);
-
+ RREG32(mmVGA_RENDER_CONTROL) & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK);
}
static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev)
@@ -419,7 +455,6 @@ void dce_v6_0_disable_dce(struct amdgpu_device *adev)
static void dce_v6_0_program_fmt(struct drm_encoder *encoder)
{
-
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -895,8 +930,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
wm_high.dram_channels = dram_channels;
wm_high.num_heads = num_heads;
- if (adev->pm.dpm_enabled) {
/* watermark for low clocks */
+ if (adev->pm.dpm_enabled) {
wm_low.yclk =
amdgpu_dpm_get_mclk(adev, true) * 10;
wm_low.sclk =
@@ -976,16 +1011,16 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
/* select wm A */
arb_control3 = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
tmp = arb_control3;
- tmp &= ~LATENCY_WATERMARK_MASK(3);
- tmp |= LATENCY_WATERMARK_MASK(1);
+ tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+ tmp |= (1 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
((latency_watermark_a << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
(line_time << DPG_PIPE_URGENCY_CONTROL__URGENCY_HIGH_WATERMARK__SHIFT)));
/* select wm B */
tmp = RREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset);
- tmp &= ~LATENCY_WATERMARK_MASK(3);
- tmp |= LATENCY_WATERMARK_MASK(2);
+ tmp &= ~(3 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
+ tmp |= (2 << DPG_PIPE_ARBITRATION_CONTROL3__URGENCY_WATERMARK_MASK__SHIFT);
WREG32(mmDPG_PIPE_ARBITRATION_CONTROL3 + amdgpu_crtc->crtc_offset, tmp);
WREG32(mmDPG_PIPE_URGENCY_CONTROL + amdgpu_crtc->crtc_offset,
((latency_watermark_b << DPG_PIPE_URGENCY_CONTROL__URGENCY_LOW_WATERMARK__SHIFT) |
@@ -1006,6 +1041,20 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
}
/* watermark setup */
+/**
+ * dce_v6_0_line_buffer_adjust - Set up the line buffer
+ *
+ * @adev: amdgpu_device pointer
+ * @amdgpu_crtc: the selected display controller
+ * @mode: the current display mode on the selected display
+ * controller
+ * @other_mode: the display mode of another display controller
+ * that may be sharing the line buffer
+ *
+ * Setup up the line buffer allocation for
+ * the selected display controller (CIK).
+ * Returns the line buffer size in pixels.
+ */
static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
struct amdgpu_crtc *amdgpu_crtc,
struct drm_display_mode *mode,
@@ -1040,7 +1089,7 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,
}
WREG32(mmDC_LB_MEMORY_SPLIT + amdgpu_crtc->crtc_offset,
- DC_LB_MEMORY_CONFIG(tmp));
+ (tmp << DC_LB_MEMORY_SPLIT__DC_LB_MEMORY_CONFIG__SHIFT));
WREG32(mmPIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
(buffer_alloc << PIPE0_DMIF_BUFFER_CONTROL__DMIF_BUFFERS_ALLOCATED__SHIFT));
@@ -1257,6 +1306,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+ u32 offset;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct amdgpu_connector *amdgpu_connector = NULL;
@@ -1278,6 +1328,11 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
{ ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
};
+ if (!dig || !dig->afmt || !dig->afmt->pin)
+ return;
+
+ offset = dig->afmt->pin->offset;
+
drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
if (connector->encoder == encoder) {
@@ -1299,7 +1354,7 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
return;
for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
- u32 tmp = 0;
+ u32 value = 0;
u8 stereo_freqs = 0;
int max_channels = -1;
int j;
@@ -1309,12 +1364,12 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
if (sad->format == eld_reg_to_type[i][1]) {
if (sad->channels > max_channels) {
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- MAX_CHANNELS, sad->channels);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- DESCRIPTOR_BYTE_2, sad->byte2);
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES, sad->freq);
+ value = (sad->channels <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) |
+ (sad->byte2 <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) |
+ (sad->freq <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT);
max_channels = sad->channels;
}
@@ -1325,13 +1380,13 @@ static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder)
}
}
- tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0,
- SUPPORTED_FREQUENCIES_STEREO, stereo_freqs);
- WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp);
+ value |= (stereo_freqs <<
+ AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES_STEREO__SHIFT);
+
+ WREG32_AUDIO_ENDPT(offset, eld_reg_to_type[i][0], value);
}
kfree(sads);
-
}
static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
@@ -1347,13 +1402,13 @@ static void dce_v6_0_audio_enable(struct amdgpu_device *adev,
static const u32 pin_offsets[7] =
{
- (0x1780 - 0x1780),
- (0x1786 - 0x1780),
- (0x178c - 0x1780),
- (0x1792 - 0x1780),
- (0x1798 - 0x1780),
- (0x179d - 0x1780),
- (0x17a4 - 0x1780),
+ AUD0_REGISTER_OFFSET,
+ AUD1_REGISTER_OFFSET,
+ AUD2_REGISTER_OFFSET,
+ AUD3_REGISTER_OFFSET,
+ AUD4_REGISTER_OFFSET,
+ AUD5_REGISTER_OFFSET,
+ AUD6_REGISTER_OFFSET,
};
static int dce_v6_0_audio_init(struct amdgpu_device *adev)
@@ -1386,6 +1441,8 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev)
adev->mode_info.audio.pin[i].connected = false;
adev->mode_info.audio.pin[i].offset = pin_offsets[i];
adev->mode_info.audio.pin[i].id = i;
+ /* disable audio. it will be set up later */
+ /* XXX remove once we switch to ip funcs */
dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false);
}
@@ -1835,7 +1892,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
struct amdgpu_bo *abo;
uint64_t fb_location, tiling_flags;
uint32_t fb_format, fb_pitch_pixels, pipe_config;
- u32 fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_NONE);
+ u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
u32 viewport_w, viewport_h;
int r;
bool bypass_lut = false;
@@ -1875,76 +1932,76 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
switch (target_fb->format->format) {
case DRM_FORMAT_C8:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_8BPP) |
- GRPH_FORMAT(GRPH_FORMAT_INDEXED));
+ fb_format = ((GRPH_DEPTH_8BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_INDEXED << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
break;
case DRM_FORMAT_XRGB4444:
case DRM_FORMAT_ARGB4444:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB4444));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB4444 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB1555:
case DRM_FORMAT_ARGB1555:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB1555));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB1555 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_BGRX5551:
case DRM_FORMAT_BGRA5551:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_BGRA5551));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_BGRA5551 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_RGB565:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_16BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB565));
+ fb_format = ((GRPH_DEPTH_16BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB565 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN16);
+ fb_swap = (GRPH_ENDIAN_8IN16 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB8888:
case DRM_FORMAT_ARGB8888:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
case DRM_FORMAT_XRGB2101010:
case DRM_FORMAT_ARGB2101010:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB2101010));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB2101010 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
case DRM_FORMAT_BGRX1010102:
case DRM_FORMAT_BGRA1010102:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_BGRA1010102));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_BGRA1010102 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap = GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap = (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
- fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
- GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
- fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) |
- GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R));
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+ fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
+ (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
#ifdef __BIG_ENDIAN
- fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+ fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
#endif
break;
default:
@@ -1962,18 +2019,18 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT);
num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
- fb_format |= GRPH_NUM_BANKS(num_banks);
- fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_2D_TILED_THIN1);
- fb_format |= GRPH_TILE_SPLIT(tile_split);
- fb_format |= GRPH_BANK_WIDTH(bankw);
- fb_format |= GRPH_BANK_HEIGHT(bankh);
- fb_format |= GRPH_MACRO_TILE_ASPECT(mtaspect);
+ fb_format |= (num_banks << GRPH_CONTROL__GRPH_NUM_BANKS__SHIFT);
+ fb_format |= (GRPH_ARRAY_2D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
+ fb_format |= (tile_split << GRPH_CONTROL__GRPH_TILE_SPLIT__SHIFT);
+ fb_format |= (bankw << GRPH_CONTROL__GRPH_BANK_WIDTH__SHIFT);
+ fb_format |= (bankh << GRPH_CONTROL__GRPH_BANK_HEIGHT__SHIFT);
+ fb_format |= (mtaspect << GRPH_CONTROL__GRPH_MACRO_TILE_ASPECT__SHIFT);
} else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == ARRAY_1D_TILED_THIN1) {
- fb_format |= GRPH_ARRAY_MODE(GRPH_ARRAY_1D_TILED_THIN1);
+ fb_format |= (GRPH_ARRAY_1D_TILED_THIN1 << GRPH_CONTROL__GRPH_ARRAY_MODE__SHIFT);
}
pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
- fb_format |= GRPH_PIPE_CONFIG(pipe_config);
+ fb_format |= (pipe_config << GRPH_CONTROL__GRPH_PIPE_CONFIG__SHIFT);
dce_v6_0_vga_enable(crtc, false);
@@ -1989,7 +2046,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
(u32)fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
- (u32) fb_location & GRPH_PRIMARY_SURFACE_ADDRESS__GRPH_PRIMARY_SURFACE_ADDRESS_MASK);
+ (u32) fb_location & GRPH_SECONDARY_SURFACE_ADDRESS__GRPH_SECONDARY_SURFACE_ADDRESS_MASK);
WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
WREG32(mmGRPH_SWAP_CNTL + amdgpu_crtc->crtc_offset, fb_swap);
@@ -2057,14 +2114,13 @@ static void dce_v6_0_set_interleave(struct drm_crtc *crtc,
if (mode->flags & DRM_MODE_FLAG_INTERLACE)
WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset,
- INTERLEAVE_EN);
+ DATA_FORMAT__INTERLEAVE_EN_MASK);
else
WREG32(mmDATA_FORMAT + amdgpu_crtc->crtc_offset, 0);
}
static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
{
-
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -2074,15 +2130,15 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id);
WREG32(mmINPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
- (0 << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
+ ((INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_GRPH_MODE__SHIFT) |
+ (INPUT_CSC_BYPASS << INPUT_CSC_CONTROL__INPUT_CSC_OVL_MODE__SHIFT)));
WREG32(mmPRESCALE_GRPH_CONTROL + amdgpu_crtc->crtc_offset,
PRESCALE_GRPH_CONTROL__GRPH_PRESCALE_BYPASS_MASK);
WREG32(mmPRESCALE_OVL_CONTROL + amdgpu_crtc->crtc_offset,
PRESCALE_OVL_CONTROL__OVL_PRESCALE_BYPASS_MASK);
WREG32(mmINPUT_GAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
- (0 << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
+ ((INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__GRPH_INPUT_GAMMA_MODE__SHIFT) |
+ (INPUT_GAMMA_USE_LUT << INPUT_GAMMA_CONTROL__OVL_INPUT_GAMMA_MODE__SHIFT)));
WREG32(mmDC_LUT_CONTROL + amdgpu_crtc->crtc_offset, 0);
@@ -2109,19 +2165,19 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc)
}
WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
- (0 << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
- ICON_DEGAMMA_MODE(0) |
- (0 << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
+ ((DEGAMMA_BYPASS << DEGAMMA_CONTROL__GRPH_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__OVL_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__ICON_DEGAMMA_MODE__SHIFT) |
+ (DEGAMMA_BYPASS << DEGAMMA_CONTROL__CURSOR_DEGAMMA_MODE__SHIFT)));
WREG32(mmGAMUT_REMAP_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
- (0 << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
+ ((GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__GRPH_GAMUT_REMAP_MODE__SHIFT) |
+ (GAMUT_REMAP_BYPASS << GAMUT_REMAP_CONTROL__OVL_GAMUT_REMAP_MODE__SHIFT)));
WREG32(mmREGAMMA_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
- (0 << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
+ ((REGAMMA_BYPASS << REGAMMA_CONTROL__GRPH_REGAMMA_MODE__SHIFT) |
+ (REGAMMA_BYPASS << REGAMMA_CONTROL__OVL_REGAMMA_MODE__SHIFT)));
WREG32(mmOUTPUT_CSC_CONTROL + amdgpu_crtc->crtc_offset,
- ((0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
- (0 << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
+ ((OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_GRPH_MODE__SHIFT) |
+ (OUTPUT_CSC_BYPASS << OUTPUT_CSC_CONTROL__OUTPUT_CSC_OVL_MODE__SHIFT)));
/* XXX match this to the depth of the crtc fmt block, move to modeset? */
WREG32(0x1a50 + amdgpu_crtc->crtc_offset, 0);
@@ -2216,8 +2272,6 @@ static void dce_v6_0_hide_cursor(struct drm_crtc *crtc)
WREG32(mmCUR_CONTROL + amdgpu_crtc->crtc_offset,
(CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
(CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
-
-
}
static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
@@ -2234,7 +2288,6 @@ static void dce_v6_0_show_cursor(struct drm_crtc *crtc)
CUR_CONTROL__CURSOR_EN_MASK |
(CURSOR_24_8_PRE_MULT << CUR_CONTROL__CURSOR_MODE__SHIFT) |
(CURSOR_URGENT_1_2 << CUR_CONTROL__CURSOR_URGENT_CONTROL__SHIFT));
-
}
static int dce_v6_0_cursor_move_locked(struct drm_crtc *crtc,
@@ -2545,7 +2598,6 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc,
const struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
-
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct drm_encoder *encoder;
@@ -2602,6 +2654,32 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v6_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+
+}
+
+static const struct drm_plane_helper_funcs dce_v6_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v6_0_panic_flush,
+};
+
static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2629,13 +2707,14 @@ static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v6_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v6_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v6_0_early_init(void *handle)
+static int dce_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v6_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v6_0_audio_endpt_wreg;
@@ -2664,11 +2743,10 @@ static int dce_v6_0_early_init(void *handle)
return 0;
}
-static int dce_v6_0_sw_init(void *handle)
+static int dce_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- bool ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2711,8 +2789,7 @@ static int dce_v6_0_sw_init(void *handle)
return r;
}
- ret = amdgpu_atombios_get_connector_info_from_object_table(adev);
- if (ret)
+ if (amdgpu_atombios_get_connector_info_from_object_table(adev))
amdgpu_display_print_display_setup(adev_to_drm(adev));
else
return -EINVAL;
@@ -2743,9 +2820,9 @@ static int dce_v6_0_sw_init(void *handle)
return r;
}
-static int dce_v6_0_sw_fini(void *handle)
+static int dce_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
drm_edid_free(adev->mode_info.bios_hardcoded_edid);
@@ -2760,10 +2837,10 @@ static int dce_v6_0_sw_fini(void *handle)
return 0;
}
-static int dce_v6_0_hw_init(void *handle)
+static int dce_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* disable vga render */
dce_v6_0_set_vga_render_state(adev, false);
@@ -2783,10 +2860,10 @@ static int dce_v6_0_hw_init(void *handle)
return 0;
}
-static int dce_v6_0_hw_fini(void *handle)
+static int dce_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v6_0_hpd_fini(adev);
@@ -2801,9 +2878,9 @@ static int dce_v6_0_hw_fini(void *handle)
return 0;
}
-static int dce_v6_0_suspend(void *handle)
+static int dce_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_display_suspend_helper(adev);
@@ -2812,18 +2889,18 @@ static int dce_v6_0_suspend(void *handle)
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v6_0_hw_fini(handle);
+ return dce_v6_0_hw_fini(ip_block);
}
-static int dce_v6_0_resume(void *handle)
+static int dce_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v6_0_hw_init(handle);
+ ret = dce_v6_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2838,19 +2915,35 @@ static int dce_v6_0_resume(void *handle)
return amdgpu_display_resume_helper(adev);
}
-static bool dce_v6_0_is_idle(void *handle)
+static bool dce_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v6_0_wait_for_idle(void *handle)
+static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
+ u32 srbm_soft_reset = 0, tmp;
+ struct amdgpu_device *adev = ip_block->adev;
-static int dce_v6_0_soft_reset(void *handle)
-{
- DRM_INFO("xxxx: dce_v6_0_soft_reset --- no impl!!\n");
+ if (dce_v6_0_is_display_hung(adev))
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
+
+ if (srbm_soft_reset) {
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+ tmp |= srbm_soft_reset;
+ dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~srbm_soft_reset;
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
return 0;
}
@@ -2867,22 +2960,22 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
switch (crtc) {
case 0:
- reg_block = SI_CRTC0_REGISTER_OFFSET;
+ reg_block = CRTC0_REGISTER_OFFSET;
break;
case 1:
- reg_block = SI_CRTC1_REGISTER_OFFSET;
+ reg_block = CRTC1_REGISTER_OFFSET;
break;
case 2:
- reg_block = SI_CRTC2_REGISTER_OFFSET;
+ reg_block = CRTC2_REGISTER_OFFSET;
break;
case 3:
- reg_block = SI_CRTC3_REGISTER_OFFSET;
+ reg_block = CRTC3_REGISTER_OFFSET;
break;
case 4:
- reg_block = SI_CRTC4_REGISTER_OFFSET;
+ reg_block = CRTC4_REGISTER_OFFSET;
break;
case 5:
- reg_block = SI_CRTC5_REGISTER_OFFSET;
+ reg_block = CRTC5_REGISTER_OFFSET;
break;
default:
DRM_DEBUG("invalid crtc %d\n", crtc);
@@ -2892,12 +2985,12 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev,
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
interrupt_mask = RREG32(mmINT_MASK + reg_block);
- interrupt_mask &= ~VBLANK_INT_MASK;
+ interrupt_mask &= ~INT_MASK__VBLANK_INT_MASK;
WREG32(mmINT_MASK + reg_block, interrupt_mask);
break;
case AMDGPU_IRQ_STATE_ENABLE:
interrupt_mask = RREG32(mmINT_MASK + reg_block);
- interrupt_mask |= VBLANK_INT_MASK;
+ interrupt_mask |= INT_MASK__VBLANK_INT_MASK;
WREG32(mmINT_MASK + reg_block, interrupt_mask);
break;
default:
@@ -2912,28 +3005,28 @@ static void dce_v6_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
}
-static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_hpd_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
- unsigned type,
+ unsigned hpd,
enum amdgpu_interrupt_state state)
{
u32 dc_hpd_int_cntl;
- if (type >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", type);
+ if (hpd >= adev->mode_info.num_hpd) {
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return 0;
}
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
- dc_hpd_int_cntl &= ~DC_HPDx_INT_EN;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ dc_hpd_int_cntl &= ~DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type]);
- dc_hpd_int_cntl |= DC_HPDx_INT_EN;
- WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[type], dc_hpd_int_cntl);
+ dc_hpd_int_cntl = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]);
+ dc_hpd_int_cntl |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_EN_MASK;
+ WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], dc_hpd_int_cntl);
break;
default:
break;
@@ -2942,7 +3035,7 @@ static int dce_v6_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
return 0;
}
-static int dce_v6_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_crtc_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3002,7 +3095,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
switch (entry->src_data[0]) {
case 0: /* vblank */
if (disp_int & interrupt_status_offsets[crtc].vblank)
- WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK);
+ WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_STATUS__VBLANK_ACK_MASK);
else
DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
@@ -3013,7 +3106,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
break;
case 1: /* vline */
if (disp_int & interrupt_status_offsets[crtc].vline)
- WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_ACK);
+ WREG32(mmVLINE_STATUS + crtc_offsets[crtc], VLINE_STATUS__VLINE_ACK_MASK);
else
DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
@@ -3027,7 +3120,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v6_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+static int dce_v6_0_set_pageflip_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3078,7 +3171,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev,
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
works = amdgpu_crtc->pflip_works;
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != "
"AMDGPU_FLIP_SUBMITTED(%d)\n",
amdgpu_crtc->pflip_status,
@@ -3126,16 +3219,15 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
}
return 0;
-
}
-static int dce_v6_0_set_clockgating_state(void *handle,
+static int dce_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v6_0_set_powergating_state(void *handle,
+static int dce_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3144,7 +3236,6 @@ static int dce_v6_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
.name = "dce_v6_0",
.early_init = dce_v6_0_early_init,
- .late_init = NULL,
.sw_init = dce_v6_0_sw_init,
.sw_fini = dce_v6_0_sw_fini,
.hw_init = dce_v6_0_hw_init,
@@ -3152,20 +3243,15 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
.suspend = dce_v6_0_suspend,
.resume = dce_v6_0_resume,
.is_idle = dce_v6_0_is_idle,
- .wait_for_idle = dce_v6_0_wait_for_idle,
.soft_reset = dce_v6_0_soft_reset,
.set_clockgating_state = dce_v6_0_set_clockgating_state,
.set_powergating_state = dce_v6_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
-static void
-dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
+static void dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
-
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
@@ -3185,7 +3271,6 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder,
static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
{
-
struct amdgpu_device *adev = drm_to_adev(encoder->dev);
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
@@ -3225,7 +3310,6 @@ static void dce_v6_0_encoder_prepare(struct drm_encoder *encoder)
static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
{
-
struct drm_device *dev = encoder->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
@@ -3236,7 +3320,6 @@ static void dce_v6_0_encoder_commit(struct drm_encoder *encoder)
static void dce_v6_0_encoder_disable(struct drm_encoder *encoder)
{
-
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
struct amdgpu_encoder_atom_dig *dig;
int em = amdgpu_atombios_encoder_get_encoder_mode(encoder);
@@ -3263,8 +3346,7 @@ static void dce_v6_0_ext_commit(struct drm_encoder *encoder)
}
-static void
-dce_v6_0_ext_mode_set(struct drm_encoder *encoder,
+static void dce_v6_0_ext_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
@@ -3276,8 +3358,7 @@ static void dce_v6_0_ext_disable(struct drm_encoder *encoder)
}
-static void
-dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode)
+static void dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode)
{
}
@@ -3348,7 +3429,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev,
amdgpu_encoder->devices |= supported_device;
return;
}
-
}
/* add a new one */
@@ -3455,17 +3535,17 @@ static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = {
- .set = dce_v6_0_set_crtc_interrupt_state,
+ .set = dce_v6_0_set_crtc_irq_state,
.process = dce_v6_0_crtc_irq,
};
static const struct amdgpu_irq_src_funcs dce_v6_0_pageflip_irq_funcs = {
- .set = dce_v6_0_set_pageflip_interrupt_state,
+ .set = dce_v6_0_set_pageflip_irq_state,
.process = dce_v6_0_pageflip_irq,
};
static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = {
- .set = dce_v6_0_set_hpd_interrupt_state,
+ .set = dce_v6_0_set_hpd_irq_state,
.process = dce_v6_0_hpd_irq,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 77ac3f114d24..e62ccf9eb73d 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -271,7 +271,7 @@ static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev,
u32 tmp;
if (hpd >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", hpd);
+ DRM_DEBUG("invalid hpd %d\n", hpd);
return;
}
@@ -1395,13 +1395,13 @@ static void dce_v8_0_audio_enable(struct amdgpu_device *adev,
}
static const u32 pin_offsets[7] = {
- (0x1780 - 0x1780),
- (0x1786 - 0x1780),
- (0x178c - 0x1780),
- (0x1792 - 0x1780),
- (0x1798 - 0x1780),
- (0x179d - 0x1780),
- (0x17a4 - 0x1780),
+ AUD0_REGISTER_OFFSET,
+ AUD1_REGISTER_OFFSET,
+ AUD2_REGISTER_OFFSET,
+ AUD3_REGISTER_OFFSET,
+ AUD4_REGISTER_OFFSET,
+ AUD5_REGISTER_OFFSET,
+ AUD6_REGISTER_OFFSET,
};
static int dce_v8_0_audio_init(struct amdgpu_device *adev)
@@ -2613,6 +2613,31 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = {
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
+static void dce_v8_0_panic_flush(struct drm_plane *plane)
+{
+ struct drm_framebuffer *fb;
+ struct amdgpu_crtc *amdgpu_crtc;
+ struct amdgpu_device *adev;
+ uint32_t fb_format;
+
+ if (!plane->fb)
+ return;
+
+ fb = plane->fb;
+ amdgpu_crtc = to_amdgpu_crtc(plane->crtc);
+ adev = drm_to_adev(fb->dev);
+
+ /* Disable DC tiling */
+ fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset);
+ fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK;
+ WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format);
+}
+
+static const struct drm_plane_helper_funcs dce_v8_0_drm_primary_plane_helper_funcs = {
+ .get_scanout_buffer = amdgpu_display_get_scanout_buffer,
+ .panic_flush = dce_v8_0_panic_flush,
+};
+
static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
{
struct amdgpu_crtc *amdgpu_crtc;
@@ -2640,13 +2665,14 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
amdgpu_crtc->encoder = NULL;
amdgpu_crtc->connector = NULL;
drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v8_0_crtc_helper_funcs);
+ drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v8_0_drm_primary_plane_helper_funcs);
return 0;
}
-static int dce_v8_0_early_init(void *handle)
+static int dce_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->audio_endpt_rreg = &dce_v8_0_audio_endpt_rreg;
adev->audio_endpt_wreg = &dce_v8_0_audio_endpt_wreg;
@@ -2680,10 +2706,10 @@ static int dce_v8_0_early_init(void *handle)
return 0;
}
-static int dce_v8_0_sw_init(void *handle)
+static int dce_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
@@ -2764,9 +2790,9 @@ static int dce_v8_0_sw_init(void *handle)
return 0;
}
-static int dce_v8_0_sw_fini(void *handle)
+static int dce_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
drm_edid_free(adev->mode_info.bios_hardcoded_edid);
@@ -2782,10 +2808,10 @@ static int dce_v8_0_sw_fini(void *handle)
return 0;
}
-static int dce_v8_0_hw_init(void *handle)
+static int dce_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* disable vga render */
dce_v8_0_set_vga_render_state(adev, false);
@@ -2805,10 +2831,10 @@ static int dce_v8_0_hw_init(void *handle)
return 0;
}
-static int dce_v8_0_hw_fini(void *handle)
+static int dce_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
dce_v8_0_hpd_fini(adev);
@@ -2823,9 +2849,9 @@ static int dce_v8_0_hw_fini(void *handle)
return 0;
}
-static int dce_v8_0_suspend(void *handle)
+static int dce_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_display_suspend_helper(adev);
@@ -2835,18 +2861,18 @@ static int dce_v8_0_suspend(void *handle)
adev->mode_info.bl_level =
amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
- return dce_v8_0_hw_fini(handle);
+ return dce_v8_0_hw_fini(ip_block);
}
-static int dce_v8_0_resume(void *handle)
+static int dce_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
adev->mode_info.bl_level);
- ret = dce_v8_0_hw_init(handle);
+ ret = dce_v8_0_hw_init(ip_block);
/* turn on the BL */
if (adev->mode_info.bl_encoder) {
@@ -2861,20 +2887,15 @@ static int dce_v8_0_resume(void *handle)
return amdgpu_display_resume_helper(adev);
}
-static bool dce_v8_0_is_idle(void *handle)
+static bool dce_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int dce_v8_0_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int dce_v8_0_soft_reset(void *handle)
+static int dce_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0, tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (dce_v8_0_is_display_hung(adev))
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
@@ -3000,7 +3021,7 @@ static void dce_v8_0_set_crtc_vline_interrupt_state(struct amdgpu_device *adev,
}
}
-static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_hpd_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3008,7 +3029,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
u32 dc_hpd_int_cntl;
if (type >= adev->mode_info.num_hpd) {
- DRM_DEBUG("invalid hdp %d\n", type);
+ DRM_DEBUG("invalid hpd %d\n", type);
return 0;
}
@@ -3030,7 +3051,7 @@ static int dce_v8_0_set_hpd_interrupt_state(struct amdgpu_device *adev,
return 0;
}
-static int dce_v8_0_set_crtc_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_crtc_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3115,7 +3136,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
return 0;
}
-static int dce_v8_0_set_pageflip_interrupt_state(struct amdgpu_device *adev,
+static int dce_v8_0_set_pageflip_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
enum amdgpu_interrupt_state state)
@@ -3217,13 +3238,13 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
}
-static int dce_v8_0_set_clockgating_state(void *handle,
+static int dce_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int dce_v8_0_set_powergating_state(void *handle,
+static int dce_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -3232,7 +3253,6 @@ static int dce_v8_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
.name = "dce_v8_0",
.early_init = dce_v8_0_early_init,
- .late_init = NULL,
.sw_init = dce_v8_0_sw_init,
.sw_fini = dce_v8_0_sw_fini,
.hw_init = dce_v8_0_hw_init,
@@ -3240,12 +3260,9 @@ static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
.suspend = dce_v8_0_suspend,
.resume = dce_v8_0_resume,
.is_idle = dce_v8_0_is_idle,
- .wait_for_idle = dce_v8_0_wait_for_idle,
.soft_reset = dce_v8_0_soft_reset,
.set_clockgating_state = dce_v8_0_set_clockgating_state,
.set_powergating_state = dce_v8_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static void
@@ -3530,17 +3547,17 @@ static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev)
}
static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = {
- .set = dce_v8_0_set_crtc_interrupt_state,
+ .set = dce_v8_0_set_crtc_irq_state,
.process = dce_v8_0_crtc_irq,
};
static const struct amdgpu_irq_src_funcs dce_v8_0_pageflip_irq_funcs = {
- .set = dce_v8_0_set_pageflip_interrupt_state,
+ .set = dce_v8_0_set_pageflip_irq_state,
.process = dce_v8_0_pageflip_irq,
};
static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = {
- .set = dce_v8_0_set_hpd_interrupt_state,
+ .set = dce_v8_0_set_hpd_irq_state,
.process = dce_v8_0_hpd_irq,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 483a441b46aa..621aeca53880 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -254,8 +254,8 @@ static void df_v3_6_sw_init(struct amdgpu_device *adev)
static void df_v3_6_sw_fini(struct amdgpu_device *adev)
{
-
- device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
+ if (adev->dev->kobj.sd)
+ device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 45ed97038df0..75ea071744eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -40,11 +40,11 @@
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
#include "soc15.h"
-#include "soc15d.h"
#include "soc15_common.h"
#include "clearstate_gfx10.h"
#include "v10_structs.h"
#include "gfx_v10_0.h"
+#include "gfx_v10_0_cleaner_shader.h"
#include "nbio_v2_3.h"
/*
@@ -368,11 +368,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST),
/* cp header registers */
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP),
/* SE status registers */
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
@@ -421,7 +416,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS)
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
};
static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
@@ -448,7 +452,32 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI)
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI),
+ /* gfx header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
};
static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
@@ -3673,17 +3702,23 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
unsigned int vmid);
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -3783,12 +3818,65 @@ static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
}
+static void gfx_v10_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
+ uint32_t xcc_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ unsigned i;
+ uint32_t tmp;
+
+ /* enter save mode */
+ amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "fail to wait on hqd deactive\n");
+ } else if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, mmCP_VMID_RESET, tmp);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout)
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ } else {
+ dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
+ }
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* exit safe mode */
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+}
+
static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
.kiq_set_resources = gfx10_kiq_set_resources,
.kiq_map_queues = gfx10_kiq_map_queues,
.kiq_unmap_queues = gfx10_kiq_unmap_queues,
.kiq_query_status = gfx10_kiq_query_status,
.kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
+ .kiq_reset_hw_queue = gfx_v10_0_kiq_reset_hw_queue,
.set_resources_size = 8,
.map_queues_size = 7,
.unmap_queues_size = 6,
@@ -4030,7 +4118,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -4132,18 +4220,21 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me%s.bin", ucode_prefix, wks);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce%s.bin", ucode_prefix, wks);
if (err)
goto out;
@@ -4167,6 +4258,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec%s.bin", ucode_prefix, wks);
if (err)
goto out;
@@ -4174,6 +4266,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec2%s.bin", ucode_prefix, wks);
if (!err) {
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
@@ -4232,9 +4325,7 @@ static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
volatile u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
int ctx_reg_offset;
if (adev->gfx.rlc.cs_data == NULL)
@@ -4242,39 +4333,15 @@ static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
- ctx_reg_offset =
- SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(ctx_reg_offset);
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
@@ -4683,11 +4750,14 @@ static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev)
}
}
-static int gfx_v10_0_sw_init(void *handle)
+static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id = 0;
int xcc_id = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
@@ -4697,7 +4767,7 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 1, 4):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 8;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
@@ -4712,7 +4782,7 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 2;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 2;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
@@ -4726,6 +4796,74 @@ static int gfx_v10_0_sw_init(void *handle)
adev->gfx.mec.num_queue_per_pipe = 8;
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(10, 1, 10):
+ case IP_VERSION(10, 1, 1):
+ case IP_VERSION(10, 1, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 101 &&
+ adev->gfx.pfp_fw_version >= 158 &&
+ adev->gfx.mec_fw_version >= 151) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 0):
+ case IP_VERSION(10, 3, 1):
+ case IP_VERSION(10, 3, 2):
+ case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 4):
+ case IP_VERSION(10, 3, 5):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 64 &&
+ adev->gfx.pfp_fw_version >= 100 &&
+ adev->gfx.mec_fw_version >= 122) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 6):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 14 &&
+ adev->gfx.pfp_fw_version >= 17 &&
+ adev->gfx.mec_fw_version >= 24) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(10, 3, 7):
+ adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 4 &&
+ adev->gfx.pfp_fw_version >= 9 &&
+ adev->gfx.mec_fw_version >= 12) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
/* KIQ event */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
@@ -4782,7 +4920,7 @@ static int gfx_v10_0_sw_init(void *handle)
/* set up the gfx ring */
for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
continue;
@@ -4814,6 +4952,11 @@ static int gfx_v10_0_sw_init(void *handle)
}
}
}
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
if (r) {
@@ -4842,6 +4985,10 @@ static int gfx_v10_0_sw_init(void *handle)
gfx_v10_0_alloc_ip_dump(adev);
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -4866,10 +5013,10 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.me.me_fw_ptr);
}
-static int gfx_v10_0_sw_fini(void *handle)
+static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -4881,6 +5028,8 @@ static int gfx_v10_0_sw_fini(void *handle)
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
amdgpu_gfx_kiq_fini(adev, 0);
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
gfx_v10_0_pfp_fini(adev);
gfx_v10_0_ce_fini(adev);
gfx_v10_0_me_fini(adev);
@@ -4891,6 +5040,7 @@ static int gfx_v10_0_sw_fini(void *handle)
gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev);
gfx_v10_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
kfree(adev->gfx.ip_dump_core);
kfree(adev->gfx.ip_dump_compute_queues);
@@ -5929,7 +6079,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
else
WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
- if (adev->job_hang && !enable)
+ if (amdgpu_in_reset(adev) && !enable)
return 0;
for (i = 0; i < adev->usec_timeout; i++) {
@@ -5998,7 +6148,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -6076,7 +6226,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
@@ -6153,7 +6303,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -6374,7 +6524,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -6412,7 +6562,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
+ /* Set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -6528,7 +6678,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -6576,17 +6726,13 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80);
break;
default:
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
break;
}
}
@@ -6739,22 +6885,9 @@ static int gfx_v10_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *ring;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kgq_init_queue(ring, false);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
if (r)
return r;
}
@@ -7061,55 +7194,24 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v10_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
+ gfx_v10_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v10_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kcq_init_queue(ring, false);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kcq_init_queue(&adev->gfx.compute_ring[i],
+ false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, 0);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
@@ -7366,14 +7468,17 @@ static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data);
}
-static int gfx_v10_0_hw_init(void *handle)
+static int gfx_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!amdgpu_emu_mode)
gfx_v10_0_init_golden_registers(adev);
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
/**
* For gfx 10, rlc firmware loading relies on smu firmware is
@@ -7418,9 +7523,11 @@ static int gfx_v10_0_hw_init(void *handle)
return r;
}
-static int gfx_v10_0_hw_fini(void *handle)
+static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -7431,7 +7538,7 @@ static int gfx_v10_0_hw_fini(void *handle)
* otherwise the gfxoff disallowing will be failed to set.
*/
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1))
- gfx_v10_0_set_powergating_state(handle, AMD_PG_STATE_UNGATE);
+ gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE);
if (!adev->no_hw_access) {
if (amdgpu_async_gfx_ring) {
@@ -7456,19 +7563,19 @@ static int gfx_v10_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v10_0_suspend(void *handle)
+static int gfx_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_fini(handle);
+ return gfx_v10_0_hw_fini(ip_block);
}
-static int gfx_v10_0_resume(void *handle)
+static int gfx_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v10_0_hw_init(handle);
+ return gfx_v10_0_hw_init(ip_block);
}
-static bool gfx_v10_0_is_idle(void *handle)
+static bool gfx_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -7477,11 +7584,11 @@ static bool gfx_v10_0_is_idle(void *handle)
return true;
}
-static int gfx_v10_0_wait_for_idle(void *handle)
+static int gfx_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -7495,11 +7602,11 @@ static int gfx_v10_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v10_0_soft_reset(void *handle)
+static int gfx_v10_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
@@ -7678,9 +7785,9 @@ static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
-static int gfx_v10_0_early_init(void *handle)
+static int gfx_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
@@ -7722,9 +7829,9 @@ static int gfx_v10_0_early_init(void *handle)
return gfx_v10_0_init_microcode(adev);
}
-static int gfx_v10_0_late_init(void *handle)
+static int gfx_v10_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -8319,10 +8426,10 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = {
.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
};
-static int gfx_v10_0_set_powergating_state(void *handle,
+static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -8357,10 +8464,10 @@ static int gfx_v10_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v10_0_set_clockgating_state(void *handle,
+static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -8386,9 +8493,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
/* AMD_CG_SUPPORT_GFX_FGCG */
@@ -9402,8 +9509,6 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
{
- int i;
-
/* Header itself is a NOP packet */
if (num_nop == 1) {
amdgpu_ring_write(ring, ring->funcs->nop);
@@ -9414,8 +9519,7 @@ static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
/* Header is at index 0, followed by num_nops - 1 NOP packet's */
- for (i = 1; i < num_nop; i++)
- amdgpu_ring_write(ring, ring->funcs->nop);
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
}
static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
@@ -9465,20 +9569,9 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
if (r)
return r;
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0)) {
- DRM_ERROR("fail to resv mqd_obj\n");
- return r;
- }
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kgq_init_queue(ring, true);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kgq_init_queue(ring, true);
if (r) {
- DRM_ERROR("fail to unresv mqd_obj\n");
+ DRM_ERROR("fail to init kgq\n");
return r;
}
@@ -9535,20 +9628,9 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
return r;
}
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0)) {
- dev_err(adev->dev, "fail to resv mqd_obj\n");
- return r;
- }
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v10_0_kcq_init_queue(ring, true);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v10_0_kcq_init_queue(ring, true);
if (r) {
- dev_err(adev->dev, "fail to unresv mqd_obj\n");
+ dev_err(adev->dev, "fail to init kcq\n");
return r;
}
@@ -9568,9 +9650,9 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
return amdgpu_ring_test_ring(ring);
}
-static void gfx_v10_ip_print(void *handle, struct drm_printer *p)
+static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
@@ -9597,9 +9679,14 @@ static void gfx_v10_ip_print(void *handle, struct drm_printer *p)
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
for (reg = 0; reg < reg_count; reg++) {
- drm_printf(p, "%-50s \t 0x%08x\n",
- gc_cp_reg_list_10[reg].reg_name,
- adev->gfx.ip_dump_compute_queues[index + reg]);
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "mmCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_10[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
}
index += reg_count;
}
@@ -9632,9 +9719,9 @@ static void gfx_v10_ip_print(void *handle, struct drm_printer *p)
}
}
-static void gfx_v10_ip_dump(void *handle)
+static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
@@ -9660,9 +9747,13 @@ static void gfx_v10_ip_dump(void *handle)
nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
for (reg = 0; reg < reg_count; reg++) {
- adev->gfx.ip_dump_compute_queues[index + reg] =
- RREG32(SOC15_REG_ENTRY_OFFSET(
- gc_cp_reg_list_10[reg]));
+ if (i && gc_cp_reg_list_10[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_10[reg]));
}
index += reg_count;
}
@@ -9699,6 +9790,27 @@ static void gfx_v10_ip_dump(void *handle)
amdgpu_gfx_off_ctrl(adev, true);
}
+static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v10_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v10_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.name = "gfx_v10_0",
.early_init = gfx_v10_0_early_init,
@@ -9749,7 +9861,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
2 + /* SWITCH_BUFFER */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */
.emit_ib = gfx_v10_0_ring_emit_ib_gfx,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -9772,6 +9885,9 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
.reset = gfx_v10_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -9791,7 +9907,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
- 8, /* gfx_v10_0_emit_mem_sync */
+ 8 + /* gfx_v10_0_emit_mem_sync */
+ 2, /* gfx_v10_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
.emit_fence = gfx_v10_0_ring_emit_fence,
@@ -9809,6 +9926,9 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.soft_recovery = gfx_v10_0_ring_soft_recovery,
.emit_mem_sync = gfx_v10_0_emit_mem_sync,
.reset = gfx_v10_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v10_0_ring_begin_use,
+ .end_use = gfx_v10_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
new file mode 100644
index 000000000000..f67569ccf9f6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_10_1_10 */
+static const u32 gfx_10_1_10_cleaner_shader_hex[] = {
+ 0xb0804004, 0xbf8a0000,
+ 0xbf068100, 0xbf840023,
+ 0xbe8203b8, 0xbefc0380,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefc0302, 0x80828802,
+ 0xbf84fff5, 0xbe8203ff,
+ 0x80000000, 0x87020102,
+ 0xbf840012, 0xbefe03c1,
+ 0xbeff03c1, 0xd7650001,
+ 0x0001007f, 0xd7660001,
+ 0x0002027e, 0x16020288,
+ 0xbe8203bf, 0xbefc03c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd70f6a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbf84fff7, 0xbefc03ff,
+ 0x00000068, 0xbe803000,
+ 0xbe813000, 0xbe823000,
+ 0xbe833000, 0x80fc847c,
+ 0xbf84fffa, 0xbeea0480,
+ 0xbeec0480, 0xbeee0480,
+ 0xbef00480, 0xbef20480,
+ 0xbef40480, 0xbef60480,
+ 0xbef80480, 0xbefa0480,
+ 0xbf810000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
+
+/* Define the cleaner shader gfx_10_3_0 */
+static const u32 gfx_10_3_0_cleaner_shader_hex[] = {
+ 0xb0804004, 0xbf8a0000,
+ 0xbe8203b8, 0xbefc0380,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefc0302, 0x80828802,
+ 0xbf84fff5, 0xbe8203ff,
+ 0x80000000, 0x87020002,
+ 0xbf840012, 0xbefe03c1,
+ 0xbeff03c1, 0xd7650001,
+ 0x0001007f, 0xd7660001,
+ 0x0002027e, 0x16020288,
+ 0xbe8203bf, 0xbefc03c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd70f6a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbf84fff7, 0xbefc03ff,
+ 0x00000068, 0xbe803080,
+ 0xbe813080, 0xbe823080,
+ 0xbe833080, 0x80fc847c,
+ 0xbf84fffa, 0xbeea0480,
+ 0xbeec0480, 0xbeee0480,
+ 0xbef00480, 0xbef20480,
+ 0xbef40480, 0xbef60480,
+ 0xbef80480, 0xbefa0480,
+ 0xbf810000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
new file mode 100644
index 000000000000..54f7ed9e2801
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 256 Dwords cleaner shader.
+
+// GFX10.1 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+
+shader main
+ asic(GFX10.1)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+//
+// Create 32 waves in a threadgroup (CS waves)
+// Each allocates 64 VGPRs
+// The workgroup allocates all of LDS (64kbytes)
+//
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+ s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_0
+ s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s0 == 1)
+
+ s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
+ s_mov_b32 m0, 0
+ //
+ // CLEAR VGPRs
+ //
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_mov_b32 m0, s2
+ s_sub_u32 s2, s2, 8
+ s_cbranch_scc0 label_0005
+ //
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, s0
+ s_movreld_b32 s1, s0
+ s_movreld_b32 s2, s0
+ s_movreld_b32 s3, s0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b64 vcc, 0 //clear vcc
+ //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
new file mode 100644
index 000000000000..0e1c246166c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// GFX10.3 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+
+shader main
+ asic(GFX10)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+
+//
+// Create 32 waves in a threadgroup (CS waves)
+// Each allocates 64 VGPRs
+// The workgroup allocates all of LDS (64kbytes)
+//
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+ s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
+ s_mov_b32 m0, 0
+ //
+ // CLEAR VGPRs
+ //
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_mov_b32 m0, s2
+ s_sub_u32 s2, s2, 8
+ s_cbranch_scc0 label_0005
+ //
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index d3e8be82a172..ec9b84f92d46 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -29,7 +29,6 @@
#include "amdgpu_gfx.h"
#include "amdgpu_psp.h"
#include "amdgpu_smu.h"
-#include "amdgpu_atomfirmware.h"
#include "imu_v11_0.h"
#include "soc21.h"
#include "nvd.h"
@@ -42,13 +41,15 @@
#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
#include "soc15.h"
-#include "soc15d.h"
#include "clearstate_gfx11.h"
#include "v11_structs.h"
#include "gfx_v11_0.h"
+#include "gfx_v11_0_cleaner_shader.h"
#include "gfx_v11_0_3.h"
#include "nbio_v4_3.h"
#include "mes_v11_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
#define GFX11_NUM_GFX_RINGS 1
#define GFX11_MEC_HPD_SIZE 2048
@@ -63,10 +64,28 @@
#define regPC_CONFIG_CNTL_1 0x194d
#define regPC_CONFIG_CNTL_1_BASE_IDX 1
+#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01
+#define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000
+#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501
+#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000
+
MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
@@ -97,6 +116,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin");
static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
@@ -157,9 +180,13 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
/* cp header registers */
- SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
/* SE status registers */
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
@@ -210,7 +237,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
};
static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
@@ -239,7 +275,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
};
static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
@@ -293,14 +346,20 @@ static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ u64 shader_mc_addr;
+
+ /* Cleaner shader MC address */
+ shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
+
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
- amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
amdgpu_ring_write(kiq_ring, 0); /* oac mask */
amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
}
@@ -483,8 +542,6 @@ static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
{
- int i;
-
/* Header itself is a NOP packet */
if (num_nop == 1) {
amdgpu_ring_write(ring, ring->funcs->nop);
@@ -495,8 +552,7 @@ static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
/* Header is at index 0, followed by num_nops - 1 NOP packet's */
- for (i = 1; i < num_nop; i++)
- amdgpu_ring_write(ring, ring->funcs->nop);
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
}
static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
@@ -557,33 +613,18 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t padding, offset;
-
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- padding = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
-
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
- *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r)
- return r;
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
- cpu_ptr = &adev->wb.wb[index];
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
- r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err1;
- }
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
@@ -610,12 +651,10 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err2:
- if (!ring->is_mes_queue)
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -635,6 +674,7 @@ static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *
int err = 0;
err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_toc.bin", ucode_prefix);
if (err)
goto out;
@@ -684,6 +724,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", ucode_prefix);
if (err)
goto out;
@@ -701,6 +742,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", ucode_prefix);
if (err)
goto out;
@@ -716,9 +758,15 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
adev->pdev->revision == 0xCE)
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/gc_11_0_0_rlc_1.bin");
+ else if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
else
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", ucode_prefix);
if (err)
goto out;
@@ -731,6 +779,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", ucode_prefix);
if (err)
goto out;
@@ -804,9 +853,7 @@ static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
volatile u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
int ctx_reg_offset;
if (adev->gfx.rlc.cs_data == NULL)
@@ -814,39 +861,15 @@ static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
-
- ctx_reg_offset =
- SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(ctx_reg_offset);
buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
@@ -1027,14 +1050,21 @@ static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
#define MQD_FWWORKAREA_SIZE 484
#define MQD_FWWORKAREA_ALIGNMENT 256
-static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
struct amdgpu_gfx_shadow_info *shadow_info)
{
- if (adev->gfx.cp_gfx_shadow) {
- shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
- shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
- shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
- shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check)
+{
+ if (adev->gfx.cp_gfx_shadow || skip_check) {
+ gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
return 0;
} else {
memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
@@ -1077,6 +1107,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -1106,6 +1137,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
ring->ring_obj = NULL;
ring->use_doorbell = true;
+ if (adev->gfx.disable_kq) {
+ ring->no_scheduler = true;
+ ring->no_user_submission = true;
+ }
if (!ring_id)
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
@@ -1536,31 +1571,28 @@ static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
}
}
-static int gfx_v11_0_sw_init(void *handle)
+static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- int i, j, k, r, ring_id = 0;
+ int i, j, k, r, ring_id;
int xcc_id = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
- adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
- adev->gfx.mec.num_mec = 2;
- adev->gfx.mec.num_pipe_per_mec = 4;
- adev->gfx.mec.num_queue_per_pipe = 4;
- break;
- case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 4):
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.me.num_queue_per_pipe = 2;
adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 4;
@@ -1575,6 +1607,100 @@ static int gfx_v11_0_sw_init(void *handle)
break;
}
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if (!adev->gfx.disable_uq &&
+ adev->gfx.me_fw_version >= 2390 &&
+ adev->gfx.pfp_fw_version >= 2530 &&
+ adev->gfx.mec_fw_version >= 2600 &&
+ adev->mes.fw_version[0] >= 120) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ case IP_VERSION(11, 0, 1):
+ case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
+ /* add firmware version checks here */
+ if (0 && !adev->gfx.disable_uq) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 2280 &&
+ adev->gfx.pfp_fw_version >= 2370 &&
+ adev->gfx.mec_fw_version >= 2450 &&
+ adev->mes.fw_version[0] >= 99) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 1):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 26 &&
+ adev->mes.fw_version[0] >= 114) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 12 &&
+ adev->gfx.pfp_fw_version >= 15 &&
+ adev->gfx.mec_fw_version >= 15) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(11, 5, 3):
+ adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 7 &&
+ adev->gfx.pfp_fw_version >= 8 &&
+ adev->gfx.mec_fw_version >= 8) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
/* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
amdgpu_sriov_is_pp_one_vf(adev))
@@ -1631,41 +1757,64 @@ static int gfx_v11_0_sw_init(void *handle)
return r;
}
- /* set up the gfx ring */
- for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
- for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
- if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
- continue;
-
- r = gfx_v11_0_gfx_ring_init(adev, ring_id,
- i, k, j);
- if (r)
- return r;
- ring_id++;
+ if (adev->gfx.num_gfx_rings) {
+ ring_id = 0;
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v11_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
}
}
}
- ring_id = 0;
- /* set up the compute queues - allocate horizontally across pipes */
- for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
- for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
- for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
- k, j))
- continue;
+ if (adev->gfx.num_compute_rings) {
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
+ k, j))
+ continue;
- r = gfx_v11_0_compute_ring_init(adev, ring_id,
- i, k, j);
- if (r)
- return r;
+ r = gfx_v11_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
- ring_id++;
+ ring_id++;
+ }
}
}
}
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ if ((adev->gfx.me_fw_version >= 2280) &&
+ (adev->gfx.mec_fw_version >= 2410)) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ break;
+ default:
+ break;
+ }
+
if (!adev->enable_mes_kiq) {
r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
if (r) {
@@ -1700,6 +1849,10 @@ static int gfx_v11_0_sw_init(void *handle)
gfx_v11_0_alloc_ip_dump(adev);
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -1732,10 +1885,10 @@ static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.rlc.rlc_autoload_ptr);
}
-static int gfx_v11_0_sw_fini(void *handle)
+static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -1749,6 +1902,8 @@ static int gfx_v11_0_sw_fini(void *handle)
amdgpu_gfx_kiq_fini(adev, 0);
}
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
gfx_v11_0_pfp_fini(adev);
gfx_v11_0_me_fini(adev);
gfx_v11_0_rlc_fini(adev);
@@ -1759,6 +1914,8 @@ static int gfx_v11_0_sw_fini(void *handle)
gfx_v11_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
kfree(adev->gfx.ip_dump_core);
kfree(adev->gfx.ip_dump_compute_queues);
kfree(adev->gfx.ip_dump_gfx_queues);
@@ -1832,6 +1989,7 @@ static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
{
+ u32 rb_bitmap_per_sa;
u32 rb_bitmap_width_per_sa;
u32 max_sa;
u32 active_sa_bitmap;
@@ -1849,9 +2007,11 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
adev->gfx.config.max_sh_per_se;
rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se;
+ rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
for (i = 0; i < max_sa; i++) {
if (active_sa_bitmap & (1 << i))
- active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
+ active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
}
active_rb_bitmap &= global_active_rb_bitmap;
@@ -1893,8 +2053,10 @@ static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- /* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
@@ -2327,7 +2489,7 @@ static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@@ -2371,7 +2533,7 @@ static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@@ -2416,7 +2578,7 @@ static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@@ -2862,7 +3024,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
IP_VERSION(11, 0, 4) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2))
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3))
bootload_status = RREG32_SOC15(GC, 0,
regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
else
@@ -3051,7 +3214,7 @@ static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
@@ -3269,7 +3432,7 @@ static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
@@ -3555,7 +3718,7 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -3593,7 +3756,7 @@ static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
+ /* Set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -3863,9 +4026,7 @@ static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
@@ -3896,7 +4057,7 @@ static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
priority = 1;
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
+ tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
mqd->cp_gfx_hqd_queue_priority = tmp;
}
@@ -3918,14 +4079,14 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set up mqd control */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
+ tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
mqd->cp_gfx_mqd_control = tmp;
/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
+ tmp = regCP_GFX_HQD_VMID_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
mqd->cp_gfx_hqd_vmid = 0;
@@ -3933,7 +4094,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
/* set up time quantum */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
+ tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
mqd->cp_gfx_hqd_quantum = tmp;
@@ -3955,16 +4116,18 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
+ tmp = regCP_GFX_HQD_CNTL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
#endif
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
mqd->cp_gfx_hqd_cntl = tmp;
/* set up cp_doorbell_control */
- tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+ tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
DOORBELL_OFFSET, prop->doorbell_index);
@@ -3976,11 +4139,21 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_rb_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
+ mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
/* active the queue */
mqd->cp_gfx_hqd_active = 1;
+ /* set gfx UQ items */
+ mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+ mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+ mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr);
+ mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
+ mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+ mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
return 0;
}
@@ -4015,22 +4188,9 @@ static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
int r, i;
- struct amdgpu_ring *ring;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v11_0_kgq_init_queue(ring, false);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
if (r)
return r;
}
@@ -4062,14 +4222,14 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
mqd->cp_hqd_eop_control = tmp;
/* enable doorbell? */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -4098,7 +4258,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set MQD vmid to 0 */
- tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
+ tmp = regCP_MQD_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
mqd->cp_mqd_control = tmp;
@@ -4108,7 +4268,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up the HQD, this is similar to CP_RB0_CNTL */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
@@ -4118,6 +4278,8 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
prop->allow_tunneling);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
@@ -4134,7 +4296,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = 0;
/* enable the doorbell if requested */
if (prop->use_doorbell) {
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, prop->doorbell_index);
@@ -4149,17 +4311,17 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_pq_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
+ mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
/* set the vmid for the queue */
mqd->cp_hqd_vmid = 0;
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
mqd->cp_hqd_persistent_state = tmp;
/* set MIN_IB_AVAIL_SIZE */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
+ tmp = regCP_HQD_IB_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
mqd->cp_hqd_ib_control = tmp;
@@ -4169,6 +4331,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_active = prop->hqd_active;
+ /* set UQ fenceaddress */
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
return 0;
}
@@ -4352,57 +4518,24 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v11_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
+ gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
if (!amdgpu_async_gfx_ring)
gfx_v11_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v11_0_kcq_init_queue(ring, false);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, 0);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
@@ -4455,11 +4588,23 @@ static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
return r;
}
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- r = amdgpu_ring_test_helper(ring);
- if (r)
- return r;
+ if (adev->gfx.disable_kq) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ /* we don't want to set ring->ready */
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ return r;
+ }
+ if (amdgpu_async_gfx_ring)
+ amdgpu_gfx_disable_kgq(adev, 0);
+ } else {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
@@ -4487,7 +4632,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
if (r)
return r;
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;
@@ -4568,10 +4713,13 @@ static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
}
-static int gfx_v11_0_hw_init(void *handle)
+static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
+ adev->gfx.cleaner_shader_ptr);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
if (adev->gfx.imu.funcs) {
@@ -4665,16 +4813,63 @@ static int gfx_v11_0_hw_init(void *handle)
return r;
}
-static int gfx_v11_0_hw_fini(void *handle)
+static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+ bool enable)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int irq_type;
+ int m, p, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+ for (m = 0; m < adev->gfx.me.num_me; m++) {
+ for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+ for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+ for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + (m * adev->gfx.mec.num_pipe_per_mec)
+ + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+ gfx_v11_0_set_userq_eop_interrupts(adev, false);
if (!adev->no_hw_access) {
- if (amdgpu_async_gfx_ring) {
+ if (amdgpu_async_gfx_ring &&
+ !adev->gfx.disable_kq) {
if (amdgpu_gfx_disable_kgq(adev, 0))
DRM_ERROR("KGQ disable failed\n");
}
@@ -4703,19 +4898,19 @@ static int gfx_v11_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v11_0_suspend(void *handle)
+static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v11_0_hw_fini(handle);
+ return gfx_v11_0_hw_fini(ip_block);
}
-static int gfx_v11_0_resume(void *handle)
+static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v11_0_hw_init(handle);
+ return gfx_v11_0_hw_init(ip_block);
}
-static bool gfx_v11_0_is_idle(void *handle)
+static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -4724,11 +4919,11 @@ static bool gfx_v11_0_is_idle(void *handle)
return true;
}
-static int gfx_v11_0_wait_for_idle(void *handle)
+static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -4774,12 +4969,12 @@ int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v11_0_soft_reset(void *handle)
+static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
int r, i, j, k;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
@@ -4905,10 +5100,10 @@ static int gfx_v11_0_soft_reset(void *handle)
return gfx_v11_0_cp_resume(adev);
}
-static bool gfx_v11_0_check_soft_reset(void *handle)
+static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
long tmo = msecs_to_jiffies(1000);
@@ -4929,12 +5124,13 @@ static bool gfx_v11_0_check_soft_reset(void *handle)
return false;
}
-static int gfx_v11_0_post_soft_reset(void *handle)
+static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
/**
* GFX soft reset will impact MES, need resume MES when do GFX soft reset
*/
- return amdgpu_mes_resume((struct amdgpu_device *)handle);
+ return amdgpu_mes_resume(adev);
}
static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
@@ -4995,15 +5191,40 @@ static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
-static int gfx_v11_0_early_init(void *handle)
+static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = true;
+ break;
+ case 1:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = false;
+ break;
+ case 2:
+ adev->gfx.disable_kq = true;
+ adev->gfx.disable_uq = false;
+ break;
+ }
adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
- adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
- AMDGPU_MAX_COMPUTE_RINGS);
+ if (adev->gfx.disable_kq) {
+ /* We need one GFX ring temporarily to set up
+ * the clear state.
+ */
+ adev->gfx.num_gfx_rings = 1;
+ adev->gfx.num_compute_rings = 0;
+ } else {
+ adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ }
gfx_v11_0_set_kiq_pm4_funcs(adev);
gfx_v11_0_set_ring_funcs(adev);
@@ -5018,9 +5239,9 @@ static int gfx_v11_0_early_init(void *handle)
return gfx_v11_0_init_microcode(adev);
}
-static int gfx_v11_0_late_init(void *handle)
+static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -5034,6 +5255,11 @@ static int gfx_v11_0_late_init(void *handle)
r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
if (r)
return r;
+
+ r = gfx_v11_0_set_userq_eop_interrupts(adev, true);
+ if (r)
+ return r;
+
return 0;
}
@@ -5382,6 +5608,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
break;
default:
@@ -5399,10 +5626,10 @@ static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
-static int gfx_v11_0_set_powergating_state(void *handle,
+static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -5419,6 +5646,7 @@ static int gfx_v11_0_set_powergating_state(void *handle,
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
if (!enable)
amdgpu_gfx_off_ctrl(adev, false);
@@ -5435,10 +5663,10 @@ static int gfx_v11_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v11_0_set_clockgating_state(void *handle,
+static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -5452,6 +5680,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
gfx_v11_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
break;
@@ -5462,9 +5691,9 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
/* AMD_CG_SUPPORT_GFX_MGCG */
@@ -5628,10 +5857,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
}
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x400000;
-
amdgpu_ring_write(ring, header);
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@@ -5651,10 +5876,6 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x40000000;
-
/* Currently, there is a high possibility to get wave ID mismatch
* between ME and GDS, leading to a hw deadlock, because ME generates
* different wave IDs than the GDS expects. This situation happens
@@ -5712,8 +5933,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
amdgpu_ring_write(ring, upper_32_bits(seq));
- amdgpu_ring_write(ring, ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
+ amdgpu_ring_write(ring, 0);
}
static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -5741,10 +5961,7 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- if (ring->is_mes_queue)
- gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
- else
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* compute doesn't have PFP */
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@@ -5973,28 +6190,13 @@ static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
void *de_payload_cpu_addr;
int cnt;
- if (ring->is_mes_queue) {
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v10_gfx_meta_data, de_payload);
- de_payload_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- de_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gds_backup) +
- offsetof(struct v10_gfx_meta_data, de_payload);
- gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- } else {
- offset = offsetof(struct v10_gfx_meta_data, de_payload);
- de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ offset = offsetof(struct v10_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
- gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
- AMDGPU_CSA_SIZE - adev->gds.gds_size,
- PAGE_SIZE);
- }
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -6233,25 +6435,23 @@ static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- int i;
+ u32 doorbell_offset = entry->src_data[0];
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
- uint32_t mes_queue_id = entry->src_data[0];
+ int i;
DRM_DEBUG("IH: CP EOP\n");
- if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
- struct amdgpu_mes_queue *queue;
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
- mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
-
- spin_lock(&adev->mes.queue_id_lock);
- queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
- if (queue) {
- DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
- amdgpu_fence_process(queue->ring);
- }
- spin_unlock(&adev->mes.queue_id_lock);
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
} else {
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -6418,27 +6618,29 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
- switch (me_id) {
- case 0:
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- if (ring->me == me_id && ring->pipe == pipe_id &&
- ring->queue == queue_id)
- drm_sched_fault(&ring->sched);
- }
- break;
- case 1:
- case 2:
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- if (ring->me == me_id && ring->pipe == pipe_id &&
- ring->queue == queue_id)
- drm_sched_fault(&ring->sched);
+ if (!adev->gfx.disable_kq) {
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
}
- break;
- default:
- BUG();
- break;
}
}
@@ -6546,6 +6748,69 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
+static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
+{
+ /* Disable the pipe reset until the CPFW fully support it.*/
+ dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+ return false;
+}
+
+
+static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v11_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v11_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+ r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v11_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
+ * so the pipe reset status relies on the later gfx ring test result.
+ */
+ return 0;
+}
+
static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
@@ -6555,23 +6820,17 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
return -EINVAL;
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
- if (r)
- return r;
+ if (r) {
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0)) {
- dev_err(adev->dev, "fail to resv mqd_obj\n");
- return r;
- }
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v11_0_kgq_init_queue(ring, true);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
+ dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+ r = gfx_v11_reset_gfx_pipe(ring);
+ if (r)
+ return r;
}
- amdgpu_bo_unreserve(ring->mqd_obj);
+
+ r = gfx_v11_0_kgq_init_queue(ring, true);
if (r) {
- dev_err(adev->dev, "fail to unresv mqd_obj\n");
+ dev_err(adev->dev, "failed to init kgq\n");
return r;
}
@@ -6584,50 +6843,155 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
return amdgpu_ring_test_ring(ring);
}
-static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
+static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
{
+
struct amdgpu_device *adev = ring->adev;
- int i, r = 0;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
- if (amdgpu_sriov_vf(adev))
- return -EINVAL;
+ if (!gfx_v11_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ gfx_v11_0_set_safe_mode(adev, 0);
mutex_lock(&adev->srbm_mutex);
soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
- WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
- /* make sure dequeue is complete*/
- for (i = 0; i < adev->usec_timeout; i++) {
- if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (adev->gfx.rs64_enable) {
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 0);
break;
- udelay(1);
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+ r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ } else {
+ if (ring->me == 1) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ /* mec1 fw pc: CP_MEC1_INSTR_PNTR */
+ } else {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME2_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+ r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
}
- if (i >= adev->usec_timeout)
- r = -ETIMEDOUT;
+
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ gfx_v11_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe
+ * reset status relies on the compute ring test result.
+ */
+ return 0;
+}
+
+static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
if (r) {
- dev_err(adev->dev, "fail to wait on hqd deactivate\n");
- return r;
+ dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+ r = gfx_v11_0_reset_compute_pipe(ring);
+ if (r)
+ return r;
}
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0)) {
- dev_err(adev->dev, "fail to resv mqd_obj\n");
- return r;
- }
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v11_0_kcq_init_queue(ring, true);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v11_0_kcq_init_queue(ring, true);
if (r) {
- dev_err(adev->dev, "fail to unresv mqd_obj\n");
+ dev_err(adev->dev, "fail to init kcq\n");
return r;
}
r = amdgpu_mes_map_legacy_queue(adev, ring);
@@ -6639,9 +7003,9 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
return amdgpu_ring_test_ring(ring);
}
-static void gfx_v11_ip_print(void *handle, struct drm_printer *p)
+static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
@@ -6668,9 +7032,14 @@ static void gfx_v11_ip_print(void *handle, struct drm_printer *p)
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
for (reg = 0; reg < reg_count; reg++) {
- drm_printf(p, "%-50s \t 0x%08x\n",
- gc_cp_reg_list_11[reg].reg_name,
- adev->gfx.ip_dump_compute_queues[index + reg]);
+ if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "regCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_11[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
}
index += reg_count;
}
@@ -6703,9 +7072,9 @@ static void gfx_v11_ip_print(void *handle, struct drm_printer *p)
}
}
-static void gfx_v11_ip_dump(void *handle)
+static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
@@ -6730,9 +7099,16 @@ static void gfx_v11_ip_dump(void *handle)
/* ME0 is for GFX so start from 1 for CP */
soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
for (reg = 0; reg < reg_count; reg++) {
- adev->gfx.ip_dump_compute_queues[index + reg] =
- RREG32(SOC15_REG_ENTRY_OFFSET(
- gc_cp_reg_list_11[reg]));
+ if (i &&
+ gc_cp_reg_list_11[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0,
+ regCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_11[reg]));
}
index += reg_count;
}
@@ -6769,6 +7145,27 @@ static void gfx_v11_ip_dump(void *handle)
amdgpu_gfx_off_ctrl(adev, true);
}
+static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
+{
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
.name = "gfx_v11_0",
.early_init = gfx_v11_0_early_init,
@@ -6818,7 +7215,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
5 + /* HDP_INVL */
22 + /* SET_Q_PREEMPTION_MODE */
8 + 8 + /* FENCE x2 */
- 8, /* gfx_v11_0_emit_mem_sync */
+ 8 + /* gfx_v11_0_emit_mem_sync */
+ 2, /* gfx_v11_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
.emit_fence = gfx_v11_0_ring_emit_fence,
@@ -6841,6 +7239,9 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
.soft_recovery = gfx_v11_0_ring_soft_recovery,
.emit_mem_sync = gfx_v11_0_emit_mem_sync,
.reset = gfx_v11_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v11_0_ring_begin_use,
+ .end_use = gfx_v11_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
@@ -6861,7 +7262,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v11_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
- 8, /* gfx_v11_0_emit_mem_sync */
+ 8 + /* gfx_v11_0_emit_mem_sync */
+ 2, /* gfx_v11_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
.emit_ib = gfx_v11_0_ring_emit_ib_compute,
.emit_fence = gfx_v11_0_ring_emit_fence,
@@ -6879,6 +7281,9 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
.soft_recovery = gfx_v11_0_ring_soft_recovery,
.emit_mem_sync = gfx_v11_0_emit_mem_sync,
.reset = gfx_v11_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v11_0_ring_begin_use,
+ .end_use = gfx_v11_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm
new file mode 100644
index 000000000000..9b90b66368c7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// Navi3 : Clear SGPRs, VGPRs and LDS
+// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
+// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
+// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
+// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
+// Each wave clears SGPRs 0 - 107
+// Each wave clears VGPRs 0 - 63
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+
+shader main
+ asic(GFX11)
+ type(CS)
+ wave_size(32)
+// Note: original source code from SQ team
+
+// Takes about 2500 clocks to run.
+// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
+//
+ S_BARRIER
+
+ //
+ // CLEAR VGPRs
+ //
+ s_mov_b32 m0, 0x00000058 // Loop 96/8=12 times (loop unrolled for performance)
+
+label_0005:
+ v_movreld_b32 v0, 0
+ v_movreld_b32 v1, 0
+ v_movreld_b32 v2, 0
+ v_movreld_b32 v3, 0
+ v_movreld_b32 v4, 0
+ v_movreld_b32 v5, 0
+ v_movreld_b32 v6, 0
+ v_movreld_b32 v7, 0
+ s_sub_u32 m0, m0, 8
+ s_cbranch_scc0 label_0005
+ //
+ //
+
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+ //
+ // CLEAR SGPRs
+ //
+label_0023:
+ s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+
+ s_endpgm
+
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h
new file mode 100644
index 000000000000..3218cc04f543
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Define the cleaner shader gfx_11_0_3 */
+static const u32 gfx_11_0_3_cleaner_shader_hex[] = {
+ 0xb0804006, 0xbe8200ff,
+ 0x00000058, 0xbefd0080,
+ 0x7e008480, 0x7e028480,
+ 0x7e048480, 0x7e068480,
+ 0x7e088480, 0x7e0a8480,
+ 0x7e0c8480, 0x7e0e8480,
+ 0xbefd0002, 0x80828802,
+ 0xbfa1fff5, 0xbe8200ff,
+ 0x80000000, 0x8b020002,
+ 0xbfa10012, 0xbefe00c1,
+ 0xbeff00c1, 0xd71f0001,
+ 0x0001007f, 0xd7200001,
+ 0x0002027e, 0x16020288,
+ 0xbe8200bf, 0xbefd00c1,
+ 0xd9382000, 0x00020201,
+ 0xd9386040, 0x00040401,
+ 0xd7006a01, 0x000202ff,
+ 0x00000400, 0x80828102,
+ 0xbfa1fff7, 0xbefd00ff,
+ 0x00000068, 0xbe804280,
+ 0xbe814280, 0xbe824280,
+ 0xbe834280, 0x80fd847d,
+ 0xbfa1fffa, 0xbeea0180,
+ 0xbeec0180, 0xbeee0180,
+ 0xbef00180, 0xbef20180,
+ 0xbef40180, 0xbef60180,
+ 0xbef80180, 0xbefa0180,
+ 0xbfb00000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 47b47d21f464..1234c8d64e20 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -29,7 +29,6 @@
#include "amdgpu_gfx.h"
#include "amdgpu_psp.h"
#include "amdgpu_smu.h"
-#include "amdgpu_atomfirmware.h"
#include "imu_v12_0.h"
#include "soc24.h"
#include "nvd.h"
@@ -37,21 +36,40 @@
#include "gc/gc_12_0_0_offset.h"
#include "gc/gc_12_0_0_sh_mask.h"
#include "soc24_enum.h"
-#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h"
#include "soc15.h"
-#include "soc15d.h"
#include "clearstate_gfx12.h"
#include "v12_structs.h"
#include "gfx_v12_0.h"
#include "nbif_v6_3_1.h"
#include "mes_v12_0.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
#define GFX12_NUM_GFX_RINGS 1
#define GFX12_MEC_HPD_SIZE 2048
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000
+#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01
+#define regCP_GFX_HQD_CNTL_DEFAULT 0x00f00000
+#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000
+
+#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_MQD_CONTROL_DEFAULT 0x00000100
+#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
+#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
+#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
+#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501
+#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000
+
+
MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin");
MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin");
@@ -117,11 +135,14 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR),
-
/* cp header registers */
- SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
/* SE status registers */
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
@@ -170,7 +191,16 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
};
static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
@@ -199,7 +229,24 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
+ /* cp header registers */
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
};
static const struct soc15_reg_golden golden_settings_gc_12_0_rev0[] = {
@@ -459,33 +506,18 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t padding, offset;
-
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- padding = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
-
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
- *cpu_ptr = cpu_to_le32(0xCAFEDEAD);
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r)
- return r;
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
+ return r;
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
- cpu_ptr = &adev->wb.wb[index];
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ cpu_ptr = &adev->wb.wb[index];
- r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
- goto err1;
- }
+ r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
@@ -512,12 +544,10 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err2:
- if (!ring->is_mes_queue)
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -537,6 +567,7 @@ static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *
int err = 0;
err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_toc.bin", ucode_prefix);
if (err)
goto out;
@@ -566,6 +597,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", ucode_prefix);
if (err)
goto out;
@@ -573,6 +605,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", ucode_prefix);
if (err)
goto out;
@@ -581,6 +614,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev)) {
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", ucode_prefix);
if (err)
goto out;
@@ -593,6 +627,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", ucode_prefix);
if (err)
goto out;
@@ -860,6 +895,34 @@ static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev,
soc24_grbm_select(adev, me, pipe, q, vm);
}
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE 73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE 484
+#define MQD_FWWORKAREA_ALIGNMENT 256
+
+static void gfx_v12_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info)
+{
+ shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+ shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
+ shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+ shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+}
+
+static int gfx_v12_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+ struct amdgpu_gfx_shadow_info *shadow_info,
+ bool skip_check)
+{
+ if (adev->gfx.cp_gfx_shadow || skip_check) {
+ gfx_v12_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
+ return 0;
+ }
+
+ memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+ return -EINVAL;
+}
+
static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v12_0_select_se_sh,
@@ -868,6 +931,7 @@ static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = {
.read_wave_vgprs = &gfx_v12_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v12_0_select_me_pipe_q,
.update_perfmon_mgcg = &gfx_v12_0_update_perf_clk,
+ .get_gfx_shadow_info = &gfx_v12_0_get_gfx_shadow_info,
};
static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev)
@@ -1319,20 +1383,23 @@ static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev)
}
}
-static int gfx_v12_0_sw_init(void *handle)
+static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id = 0;
unsigned num_compute_rings;
int xcc_id = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
+
+ INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
adev->gfx.me.num_me = 1;
adev->gfx.me.num_pipe_per_me = 1;
- adev->gfx.me.num_queue_per_pipe = 1;
- adev->gfx.mec.num_mec = 2;
+ adev->gfx.me.num_queue_per_pipe = 8;
+ adev->gfx.mec.num_mec = 1;
adev->gfx.mec.num_pipe_per_mec = 2;
adev->gfx.mec.num_queue_per_pipe = 4;
break;
@@ -1346,36 +1413,68 @@ static int gfx_v12_0_sw_init(void *handle)
break;
}
- /* recalculate compute rings to use based on hardware configuration */
- num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
- adev->gfx.mec.num_queue_per_pipe) / 2;
- adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
- num_compute_rings);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (!adev->gfx.disable_uq &&
+ adev->gfx.me_fw_version >= 2780 &&
+ adev->gfx.pfp_fw_version >= 2840 &&
+ adev->gfx.mec_fw_version >= 3050 &&
+ adev->mes.fw_version[0] >= 123) {
+ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+ adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if (adev->gfx.me_fw_version >= 2480 &&
+ adev->gfx.pfp_fw_version >= 2530 &&
+ adev->gfx.mec_fw_version >= 2680 &&
+ adev->mes.fw_version[0] >= 100)
+ adev->gfx.enable_cleaner_shader = true;
+ break;
+ default:
+ adev->gfx.enable_cleaner_shader = false;
+ break;
+ }
+
+ if (adev->gfx.num_compute_rings) {
+ /* recalculate compute rings to use based on hardware configuration */
+ num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe) / 2;
+ adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
+ num_compute_rings);
+ }
/* EOP Event */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
- GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
+ GFX_12_0_0__SRCID__CP_EOP_INTERRUPT,
&adev->gfx.eop_irq);
if (r)
return r;
/* Bad opcode Event */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
- GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
+ GFX_12_0_0__SRCID__CP_BAD_OPCODE_ERROR,
&adev->gfx.bad_op_irq);
if (r)
return r;
/* Privileged reg */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
- GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
+ GFX_12_0_0__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
- GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
+ GFX_12_0_0__SRCID__CP_PRIV_INSTR_FAULT,
&adev->gfx.priv_inst_irq);
if (r)
return r;
@@ -1396,41 +1495,59 @@ static int gfx_v12_0_sw_init(void *handle)
return r;
}
- /* set up the gfx ring */
- for (i = 0; i < adev->gfx.me.num_me; i++) {
- for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
- for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
- if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
- continue;
-
- r = gfx_v12_0_gfx_ring_init(adev, ring_id,
- i, k, j);
- if (r)
- return r;
- ring_id++;
+ if (adev->gfx.num_gfx_rings) {
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v12_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
}
}
}
- ring_id = 0;
- /* set up the compute queues - allocate horizontally across pipes */
- for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
- for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
- for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- if (!amdgpu_gfx_is_mec_queue_enabled(adev,
- 0, i, k, j))
- continue;
+ if (adev->gfx.num_compute_rings) {
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev,
+ 0, i, k, j))
+ continue;
- r = gfx_v12_0_compute_ring_init(adev, ring_id,
- i, k, j);
- if (r)
- return r;
+ r = gfx_v12_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
- ring_id++;
+ ring_id++;
+ }
}
}
}
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ if ((adev->gfx.me_fw_version >= 2660) &&
+ (adev->gfx.mec_fw_version >= 2920)) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ }
+ }
+
if (!adev->enable_mes_kiq) {
r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0);
if (r) {
@@ -1460,6 +1577,10 @@ static int gfx_v12_0_sw_init(void *handle)
gfx_v12_0_alloc_ip_dump(adev);
+ r = amdgpu_gfx_sysfs_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -1492,10 +1613,10 @@ static void gfx_v12_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
(void **)&adev->gfx.rlc.rlc_autoload_ptr);
}
-static int gfx_v12_0_sw_fini(void *handle)
+static int gfx_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -1519,6 +1640,8 @@ static int gfx_v12_0_sw_fini(void *handle)
gfx_v12_0_free_microcode(adev);
+ amdgpu_gfx_sysfs_fini(adev);
+
kfree(adev->gfx.ip_dump_core);
kfree(adev->gfx.ip_dump_compute_queues);
kfree(adev->gfx.ip_dump_gfx_queues);
@@ -1592,6 +1715,7 @@ static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev)
static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
{
+ u32 rb_bitmap_per_sa;
u32 rb_bitmap_width_per_sa;
u32 max_sa;
u32 active_sa_bitmap;
@@ -1609,12 +1733,14 @@ static void gfx_v12_0_setup_rb(struct amdgpu_device *adev)
adev->gfx.config.max_sh_per_se;
rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
adev->gfx.config.max_sh_per_se;
+ rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
+
for (i = 0; i < max_sa; i++) {
if (active_sa_bitmap & (1 << i))
- active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
+ active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
}
- active_rb_bitmap |= global_active_rb_bitmap;
+ active_rb_bitmap &= global_active_rb_bitmap;
adev->gfx.config.backend_enable_mask = active_rb_bitmap;
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
}
@@ -2264,7 +2390,7 @@ static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
@@ -2395,7 +2521,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
(void **)&adev->gfx.me.me_fw_data_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
- gfx_v12_0_pfp_fini(adev);
+ gfx_v12_0_me_fini(adev);
return r;
}
@@ -2408,7 +2534,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
if (amdgpu_emu_mode == 1)
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
@@ -2577,7 +2703,6 @@ static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
u32 tmp;
u32 rb_bufsz;
u64 rb_addr, rptr_addr, wptr_gpu_addr;
- u32 i;
/* Set the write pointer delay */
WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
@@ -2601,7 +2726,7 @@ static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
@@ -2632,12 +2757,6 @@ static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v12_0_cp_gfx_start(adev);
-
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
- }
-
return 0;
}
@@ -2814,9 +2933,7 @@ static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev)
@@ -2851,25 +2968,25 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set up mqd control */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
+ tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
mqd->cp_gfx_mqd_control = tmp;
/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
+ tmp = regCP_GFX_HQD_VMID_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
mqd->cp_gfx_hqd_vmid = 0;
/* set up default queue priority level
* 0x0 = low priority, 0x1 = high priority */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
+ tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
mqd->cp_gfx_hqd_queue_priority = tmp;
/* set up time quantum */
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
+ tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
mqd->cp_gfx_hqd_quantum = tmp;
@@ -2891,16 +3008,18 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
- tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
+ tmp = regCP_GFX_HQD_CNTL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
#ifdef __BIG_ENDIAN
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
#endif
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
mqd->cp_gfx_hqd_cntl = tmp;
/* set up cp_doorbell_control */
- tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
+ tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
DOORBELL_OFFSET, prop->doorbell_index);
@@ -2912,11 +3031,19 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_rb_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
+ mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
/* active the queue */
mqd->cp_gfx_hqd_active = 1;
+ /* set gfx UQ items */
+ mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+ mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+ mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+ mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
return 0;
}
@@ -2950,41 +3077,19 @@ static int gfx_v12_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
{
- int r, i;
- struct amdgpu_ring *ring;
+ int i, r;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v12_0_kgq_init_queue(ring, false);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v12_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
if (r)
- goto done;
+ return r;
}
r = amdgpu_gfx_enable_kgq(adev, 0);
if (r)
- goto done;
-
- r = gfx_v12_0_cp_gfx_start(adev);
- if (r)
- goto done;
+ return r;
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- ring->sched.ready = true;
- }
-done:
- return r;
+ return gfx_v12_0_cp_gfx_start(adev);
}
static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
@@ -3007,14 +3112,14 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
+ tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
(order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1));
mqd->cp_hqd_eop_control = tmp;
/* enable doorbell? */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
if (prop->use_doorbell) {
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
@@ -3043,7 +3148,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
/* set MQD vmid to 0 */
- tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
+ tmp = regCP_MQD_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
mqd->cp_mqd_control = tmp;
@@ -3053,7 +3158,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
/* set up the HQD, this is similar to CP_RB0_CNTL */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
+ tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
(order_base_2(prop->queue_size / 4) - 1));
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
@@ -3062,6 +3167,8 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ if (prop->tmz_queue)
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
mqd->cp_hqd_pq_control = tmp;
/* set the wb address whether it's enabled or not */
@@ -3078,7 +3185,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
tmp = 0;
/* enable the doorbell if requested */
if (prop->use_doorbell) {
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_OFFSET, prop->doorbell_index);
@@ -3093,17 +3200,17 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_pq_doorbell_control = tmp;
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
+ mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
/* set the vmid for the queue */
mqd->cp_hqd_vmid = 0;
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
+ tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
mqd->cp_hqd_persistent_state = tmp;
/* set MIN_IB_AVAIL_SIZE */
- tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
+ tmp = regCP_HQD_IB_CONTROL_DEFAULT;
tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
mqd->cp_hqd_ib_control = tmp;
@@ -3113,6 +3220,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->cp_hqd_active = prop->hqd_active;
+ /* set UQ fenceaddress */
+ mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+ mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+
return 0;
}
@@ -3297,57 +3408,25 @@ static int gfx_v12_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
static int gfx_v12_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v12_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- ring->sched.ready = true;
+ gfx_v12_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
+ adev->gfx.kiq[0].ring.sched.ready = true;
return 0;
}
static int gfx_v12_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
if (!amdgpu_async_gfx_ring)
gfx_v12_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v12_0_kcq_init_queue(ring, false);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v12_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, 0);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v12_0_cp_resume(struct amdgpu_device *adev)
@@ -3429,7 +3508,7 @@ static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev)
if (r)
return r;
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;
@@ -3513,10 +3592,10 @@ static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev)
}
}
-static int gfx_v12_0_hw_init(void *handle)
+static int gfx_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
@@ -3603,14 +3682,60 @@ static int gfx_v12_0_hw_init(void *handle)
return r;
}
-static int gfx_v12_0_hw_fini(void *handle)
+static int gfx_v12_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
+ bool enable)
+{
+ unsigned int irq_type;
+ int m, p, r;
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
+ for (m = 0; m < adev->gfx.me.num_me; m++) {
+ for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
+ for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
+ for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + (m * adev->gfx.mec.num_pipe_per_mec)
+ + p;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t tmp;
+ cancel_delayed_work_sync(&adev->gfx.idle_work);
+
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
+ gfx_v12_0_set_userq_eop_interrupts(adev, false);
if (!adev->no_hw_access) {
if (amdgpu_async_gfx_ring) {
@@ -3643,19 +3768,19 @@ static int gfx_v12_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v12_0_suspend(void *handle)
+static int gfx_v12_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v12_0_hw_fini(handle);
+ return gfx_v12_0_hw_fini(ip_block);
}
-static int gfx_v12_0_resume(void *handle)
+static int gfx_v12_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v12_0_hw_init(handle);
+ return gfx_v12_0_hw_init(ip_block);
}
-static bool gfx_v12_0_is_idle(void *handle)
+static bool gfx_v12_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -3664,11 +3789,11 @@ static bool gfx_v12_0_is_idle(void *handle)
return true;
}
-static int gfx_v12_0_wait_for_idle(void *handle)
+static int gfx_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -3695,15 +3820,37 @@ static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev)
return clock;
}
-static int gfx_v12_0_early_init(void *handle)
+static int gfx_v12_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = true;
+ break;
+ case 1:
+ adev->gfx.disable_kq = false;
+ adev->gfx.disable_uq = false;
+ break;
+ case 2:
+ adev->gfx.disable_kq = true;
+ adev->gfx.disable_uq = false;
+ break;
+ }
adev->gfx.funcs = &gfx_v12_0_gfx_funcs;
- adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
- AMDGPU_MAX_COMPUTE_RINGS);
+ if (adev->gfx.disable_kq) {
+ adev->gfx.num_gfx_rings = 0;
+ adev->gfx.num_compute_rings = 0;
+ } else {
+ adev->gfx.num_gfx_rings = GFX12_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
+ }
gfx_v12_0_set_kiq_pm4_funcs(adev);
gfx_v12_0_set_ring_funcs(adev);
@@ -3717,9 +3864,9 @@ static int gfx_v12_0_early_init(void *handle)
return gfx_v12_0_init_microcode(adev);
}
-static int gfx_v12_0_late_init(void *handle)
+static int gfx_v12_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -3734,6 +3881,10 @@ static int gfx_v12_0_late_init(void *handle)
if (r)
return r;
+ r = gfx_v12_0_set_userq_eop_interrupts(adev, true);
+ if (r)
+ return r;
+
return 0;
}
@@ -3846,10 +3997,10 @@ static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
}
#endif
-static int gfx_v12_0_set_powergating_state(void *handle,
+static int gfx_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -3981,17 +4132,6 @@ static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
if (def != data)
WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
-
- data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
- data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
- WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
-
- /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
- if (adev->sdma.num_instances > 1) {
- data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
- data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
- WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
- }
}
}
@@ -4097,15 +4237,15 @@ static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v12_0_set_clockgating_state(void *handle,
+static int gfx_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
- switch (adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
gfx_v12_0_update_gfx_clock_gating(adev,
@@ -4118,9 +4258,9 @@ static int gfx_v12_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v12_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
/* AMD_CG_SUPPORT_GFX_MGCG */
@@ -4184,45 +4324,17 @@ static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- ring->hw_prio);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always being used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- if (ring->use_doorbell) {
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
- lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
- upper_32_bits(ring->wptr));
- }
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
}
}
@@ -4247,42 +4359,14 @@ static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
- uint64_t wptr_tmp;
-
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- ring->hw_prio);
-
- wptr_tmp = ring->wptr & ring->buf_mask;
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
- *wptr_saved = wptr_tmp;
- /* assume doorbell always used by mes mapped queue */
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- } else {
- WDOORBELL64(ring->doorbell_index, wptr_tmp);
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index, wptr_tmp);
- }
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
- /* XXX check if swapping is necessary on BE */
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
- } else {
- BUG(); /* only DOORBELL method supported on gfx12 now */
- }
+ BUG(); /* only DOORBELL method supported on gfx12 now */
}
}
@@ -4329,10 +4413,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
control |= ib->length_dw | (vmid << 24);
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x400000;
-
amdgpu_ring_write(ring, header);
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@@ -4352,10 +4432,6 @@ static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
- if (ring->is_mes_queue)
- /* inherit vmid from mqd */
- control |= 0x40000000;
-
amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@@ -4395,8 +4471,7 @@ static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
amdgpu_ring_write(ring, upper_32_bits(seq));
- amdgpu_ring_write(ring, ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
+ amdgpu_ring_write(ring, 0);
}
static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -4424,10 +4499,7 @@ static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- if (ring->is_mes_queue)
- gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
- else
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* compute doesn't have PFP */
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@@ -4761,25 +4833,23 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- int i;
+ u32 doorbell_offset = entry->src_data[0];
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
- uint32_t mes_queue_id = entry->src_data[0];
+ int i;
DRM_DEBUG("IH: CP EOP\n");
- if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
- struct amdgpu_mes_queue *queue;
-
- mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
- spin_lock(&adev->mes.queue_id_lock);
- queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
- if (queue) {
- DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
- amdgpu_fence_process(queue->ring);
- }
- spin_unlock(&adev->mes.queue_id_lock);
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
} else {
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
@@ -4946,27 +5016,29 @@ static void gfx_v12_0_handle_priv_fault(struct amdgpu_device *adev,
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
- switch (me_id) {
- case 0:
- for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
- ring = &adev->gfx.gfx_ring[i];
- if (ring->me == me_id && ring->pipe == pipe_id &&
- ring->queue == queue_id)
- drm_sched_fault(&ring->sched);
- }
- break;
- case 1:
- case 2:
- for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
- if (ring->me == me_id && ring->pipe == pipe_id &&
- ring->queue == queue_id)
- drm_sched_fault(&ring->sched);
+ if (!adev->gfx.disable_kq) {
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ break;
}
- break;
- default:
- BUG();
- break;
}
}
@@ -5022,8 +5094,6 @@ static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring)
static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
{
- int i;
-
/* Header itself is a NOP packet */
if (num_nop == 1) {
amdgpu_ring_write(ring, ring->funcs->nop);
@@ -5034,13 +5104,19 @@ static void gfx_v12_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
/* Header is at index 0, followed by num_nops - 1 NOP packet's */
- for (i = 1; i < num_nop; i++)
- amdgpu_ring_write(ring, ring->funcs->nop);
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
}
-static void gfx_v12_ip_print(void *handle, struct drm_printer *p)
+static void gfx_v12_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ /* Emit the cleaner shader */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+}
+
+static void gfx_v12_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
@@ -5102,9 +5178,9 @@ static void gfx_v12_ip_print(void *handle, struct drm_printer *p)
}
}
-static void gfx_v12_ip_dump(void *handle)
+static void gfx_v12_ip_dump(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0);
@@ -5168,6 +5244,69 @@ static void gfx_v12_ip_dump(void *handle)
amdgpu_gfx_off_ctrl(adev, true);
}
+static bool gfx_v12_pipe_reset_support(struct amdgpu_device *adev)
+{
+ /* Disable the pipe reset until the CPFW fully support it.*/
+ dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
+ return false;
+}
+
+static int gfx_v12_reset_gfx_pipe(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r;
+
+ if (!gfx_v12_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
+
+ gfx_v12_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
+ soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE0_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 1);
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ PFP_PIPE1_RESET, 0);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
+ ME_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
+
+ r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ soc24_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ gfx_v12_0_unset_safe_mode(adev, 0);
+
+ dev_info(adev->dev, "The ring %s pipe reset: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* Sometimes the ME start pc counter can't cache correctly, so the
+ * PC check only as a reference and pipe reset result rely on the
+ * later ring test.
+ */
+ return 0;
+}
+
static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
@@ -5178,24 +5317,15 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
if (r) {
- dev_err(adev->dev, "reset via MES failed %d\n", r);
- return r;
+ dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
+ r = gfx_v12_reset_gfx_pipe(ring);
+ if (r)
+ return r;
}
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0)) {
- dev_err(adev->dev, "fail to resv mqd_obj\n");
- return r;
- }
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v12_0_kgq_init_queue(ring, true);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v12_0_kgq_init_queue(ring, true);
if (r) {
- DRM_ERROR("fail to unresv mqd_obj\n");
+ dev_err(adev->dev, "failed to init kgq\n");
return r;
}
@@ -5208,42 +5338,108 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
return amdgpu_ring_test_ring(ring);
}
-static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
+static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- int r, i;
+ uint32_t reset_pipe = 0, clean_pipe = 0;
+ int r = 0;
- if (amdgpu_sriov_vf(adev))
- return -EINVAL;
+ if (!gfx_v12_pipe_reset_support(adev))
+ return -EOPNOTSUPP;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ gfx_v12_0_set_safe_mode(adev, 0);
mutex_lock(&adev->srbm_mutex);
soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
- WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
- for (i = 0; i < adev->usec_timeout; i++) {
- if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+
+ reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
+ clean_pipe = reset_pipe;
+
+ if (adev->gfx.rs64_enable) {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE0_RESET, 0);
break;
- udelay(1);
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE1_RESET, 0);
+ break;
+ case 2:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE2_RESET, 0);
+ break;
+ case 3:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
+ MEC_PIPE3_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
+ r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
+ RS64_FW_UC_START_ADDR_LO;
+ } else {
+ switch (ring->pipe) {
+ case 0:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE0_RESET, 0);
+ break;
+ case 1:
+ reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 1);
+ clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
+ MEC_ME1_PIPE1_RESET, 0);
+ break;
+ default:
+ break;
+ }
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
+ WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
+ /* Doesn't find the F32 MEC instruction pointer register, and suppose
+ * the driver won't run into the F32 mode.
+ */
}
+
soc24_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ gfx_v12_0_unset_safe_mode(adev, 0);
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0)) {
- DRM_ERROR("fail to resv mqd_obj\n");
- return r;
- }
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v12_0_kcq_init_queue(ring, true);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
+ dev_info(adev->dev, "The ring %s pipe resets: %s\n", ring->name,
+ r == 0 ? "successfully" : "failed");
+ /* Need the ring test to verify the pipe reset result.*/
+ return 0;
+}
+
+static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int r;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
+ if (r) {
+ dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
+ r = gfx_v12_0_reset_compute_pipe(ring);
+ if (r)
+ return r;
}
- amdgpu_bo_unreserve(ring->mqd_obj);
+
+ r = gfx_v12_0_kcq_init_queue(ring, true);
if (r) {
- DRM_ERROR("fail to unresv mqd_obj\n");
+ dev_err(adev->dev, "failed to init kcq\n");
return r;
}
r = amdgpu_mes_map_legacy_queue(adev, ring);
@@ -5255,6 +5451,20 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
return amdgpu_ring_test_ring(ring);
}
+static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_begin_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+}
+
+static void gfx_v12_0_ring_end_use(struct amdgpu_ring *ring)
+{
+ amdgpu_gfx_profile_ring_end_use(ring);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v12_0_ip_funcs = {
.name = "gfx_v12_0",
.early_init = gfx_v12_0_early_init,
@@ -5297,7 +5507,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
3 + /* CNTX_CTRL */
5 + /* HDP_INVL */
8 + 8 + /* FENCE x2 */
- 8, /* gfx_v12_0_emit_mem_sync */
+ 8 + /* gfx_v12_0_emit_mem_sync */
+ 2, /* gfx_v12_0_ring_emit_cleaner_shader */
.emit_ib_size = 4, /* gfx_v12_0_ring_emit_ib_gfx */
.emit_ib = gfx_v12_0_ring_emit_ib_gfx,
.emit_fence = gfx_v12_0_ring_emit_fence,
@@ -5318,6 +5529,9 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = {
.soft_recovery = gfx_v12_0_ring_soft_recovery,
.emit_mem_sync = gfx_v12_0_emit_mem_sync,
.reset = gfx_v12_0_reset_kgq,
+ .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v12_0_ring_begin_use,
+ .end_use = gfx_v12_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
@@ -5336,7 +5550,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v12_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v12_0_ring_emit_fence x3 for user fence, vm fence */
- 8, /* gfx_v12_0_emit_mem_sync */
+ 8 + /* gfx_v12_0_emit_mem_sync */
+ 2, /* gfx_v12_0_ring_emit_cleaner_shader */
.emit_ib_size = 7, /* gfx_v12_0_ring_emit_ib_compute */
.emit_ib = gfx_v12_0_ring_emit_ib_compute,
.emit_fence = gfx_v12_0_ring_emit_fence,
@@ -5353,6 +5568,9 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
.soft_recovery = gfx_v12_0_ring_soft_recovery,
.emit_mem_sync = gfx_v12_0_emit_mem_sync,
.reset = gfx_v12_0_reset_kcq,
+ .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader,
+ .begin_use = gfx_v12_0_ring_begin_use,
+ .end_use = gfx_v12_0_ring_end_use,
};
static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
index bcc9c72ccbde..f7184b2dc4e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h
@@ -26,4 +26,6 @@
extern const struct amdgpu_ip_block_version gfx_v12_0_ip_block;
+int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 564f0b9336b6..70d7a1f434c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -28,19 +28,33 @@
#include "amdgpu_gfx.h"
#include "amdgpu_ucode.h"
#include "clearstate_si.h"
+#include "si.h"
+#include "sid.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
#include "gca/gfx_6_0_sh_mask.h"
+#include "gca/gfx_7_2_enum.h"
+
#include "gmc/gmc_6_0_d.h"
#include "gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
#include "dce/dce_6_0_sh_mask.h"
-#include "gca/gfx_7_2_enum.h"
+
#include "si_enums.h"
-#include "si.h"
+
+#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
+#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
+#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001
+
+#define GFX6_NUM_GFX_RINGS 1
+#define GFX6_NUM_COMPUTE_RINGS 2
static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -337,6 +351,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
@@ -345,6 +360,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
@@ -353,6 +369,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
@@ -361,6 +378,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
@@ -1717,10 +1735,14 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
gfx_v6_0_get_cu_info(adev);
gfx_v6_0_config_init(adev);
- WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
- (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
- WREG32(mmCP_MEQ_THRESHOLDS, (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
- (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
+ WREG32(mmCP_QUEUE_THRESHOLDS,
+ ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) |
+ (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT)));
+
+ /* set HW defaults for 3D engine */
+ WREG32(mmCP_MEQ_THRESHOLDS,
+ (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
+ (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
sx_debug_1 = RREG32(mmSX_DEBUG_1);
WREG32(mmSX_DEBUG_1, sx_debug_1);
@@ -1906,7 +1928,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
error:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
}
@@ -2836,44 +2858,21 @@ static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev)
static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev,
volatile u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v6_0_init_pg(struct amdgpu_device *adev)
@@ -3023,9 +3022,9 @@ static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = {
.start = gfx_v6_0_rlc_start
};
-static int gfx_v6_0_early_init(void *handle)
+static int gfx_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS;
@@ -3039,10 +3038,10 @@ static int gfx_v6_0_early_init(void *handle)
return 0;
}
-static int gfx_v6_0_sw_init(void *handle)
+static int gfx_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r;
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -3107,10 +3106,10 @@ static int gfx_v6_0_sw_init(void *handle)
return r;
}
-static int gfx_v6_0_sw_fini(void *handle)
+static int gfx_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -3122,10 +3121,10 @@ static int gfx_v6_0_sw_fini(void *handle)
return 0;
}
-static int gfx_v6_0_hw_init(void *handle)
+static int gfx_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v6_0_constants_init(adev);
@@ -3142,9 +3141,9 @@ static int gfx_v6_0_hw_init(void *handle)
return r;
}
-static int gfx_v6_0_hw_fini(void *handle)
+static int gfx_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v6_0_cp_enable(adev, false);
adev->gfx.rlc.funcs->stop(adev);
@@ -3153,23 +3152,19 @@ static int gfx_v6_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v6_0_suspend(void *handle)
+static int gfx_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v6_0_hw_fini(adev);
+ return gfx_v6_0_hw_fini(ip_block);
}
-static int gfx_v6_0_resume(void *handle)
+static int gfx_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v6_0_hw_init(adev);
+ return gfx_v6_0_hw_init(ip_block);
}
-static bool gfx_v6_0_is_idle(void *handle)
+static bool gfx_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
return false;
@@ -3177,24 +3172,19 @@ static bool gfx_v6_0_is_idle(void *handle)
return true;
}
-static int gfx_v6_0_wait_for_idle(void *handle)
+static int gfx_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v6_0_is_idle(handle))
+ if (gfx_v6_0_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int gfx_v6_0_soft_reset(void *handle)
-{
- return 0;
-}
-
static void gfx_v6_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@@ -3382,11 +3372,11 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v6_0_set_clockgating_state(void *handle,
+static int gfx_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -3404,11 +3394,11 @@ static int gfx_v6_0_set_clockgating_state(void *handle,
return 0;
}
-static int gfx_v6_0_set_powergating_state(void *handle,
+static int gfx_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
gate = true;
@@ -3444,7 +3434,6 @@ static void gfx_v6_0_emit_mem_sync(struct amdgpu_ring *ring)
static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
.name = "gfx_v6_0",
.early_init = gfx_v6_0_early_init,
- .late_init = NULL,
.sw_init = gfx_v6_0_sw_init,
.sw_fini = gfx_v6_0_sw_fini,
.hw_init = gfx_v6_0_hw_init,
@@ -3453,11 +3442,8 @@ static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
.resume = gfx_v6_0_resume,
.is_idle = gfx_v6_0_is_idle,
.wait_for_idle = gfx_v6_0_wait_for_idle,
- .soft_reset = gfx_v6_0_soft_reset,
.set_clockgating_state = gfx_v6_0_set_clockgating_state,
.set_powergating_state = gfx_v6_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index f146806c4633..da0534ff1271 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -55,6 +55,9 @@
#define GFX7_NUM_GFX_RINGS 1
#define GFX7_MEC_HPD_SIZE 2048
+#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
+#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
+
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
@@ -934,33 +937,39 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
}
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
if (adev->asic_type == CHIP_KAVERI) {
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec2.bin", chip_name);
if (err)
goto out;
}
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", chip_name);
out:
if (err) {
@@ -2324,7 +2333,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
error:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
}
@@ -2559,7 +2568,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -2876,7 +2885,7 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
@@ -3876,67 +3885,22 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev)
static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
volatile u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
- switch (adev->asic_type) {
- case CHIP_BONAIRE:
- buffer[count++] = cpu_to_le32(0x16000012);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_KAVERI:
- buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_KABINI:
- case CHIP_MULLINS:
- buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- case CHIP_HAWAII:
- buffer[count++] = cpu_to_le32(0x3a00161a);
- buffer[count++] = cpu_to_le32(0x0000002e);
- break;
- default:
- buffer[count++] = cpu_to_le32(0x00000000);
- buffer[count++] = cpu_to_le32(0x00000000);
- break;
- }
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
@@ -4134,9 +4098,9 @@ static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
.update_spm_vmid = gfx_v7_0_update_spm_vmid
};
-static int gfx_v7_0_early_init(void *handle)
+static int gfx_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
@@ -4151,9 +4115,9 @@ static int gfx_v7_0_early_init(void *handle)
return 0;
}
-static int gfx_v7_0_late_init(void *handle)
+static int gfx_v7_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -4343,10 +4307,10 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
return 0;
}
-static int gfx_v7_0_sw_init(void *handle)
+static int gfx_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j, k, r, ring_id;
switch (adev->asic_type) {
@@ -4439,9 +4403,9 @@ static int gfx_v7_0_sw_init(void *handle)
return r;
}
-static int gfx_v7_0_sw_fini(void *handle)
+static int gfx_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
@@ -4465,10 +4429,10 @@ static int gfx_v7_0_sw_fini(void *handle)
return 0;
}
-static int gfx_v7_0_hw_init(void *handle)
+static int gfx_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v7_0_constants_init(adev);
@@ -4486,9 +4450,9 @@ static int gfx_v7_0_hw_init(void *handle)
return r;
}
-static int gfx_v7_0_hw_fini(void *handle)
+static int gfx_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -4499,23 +4463,19 @@ static int gfx_v7_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v7_0_suspend(void *handle)
+static int gfx_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v7_0_hw_fini(adev);
+ return gfx_v7_0_hw_fini(ip_block);
}
-static int gfx_v7_0_resume(void *handle)
+static int gfx_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gfx_v7_0_hw_init(adev);
+ return gfx_v7_0_hw_init(ip_block);
}
-static bool gfx_v7_0_is_idle(void *handle)
+static bool gfx_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK)
return false;
@@ -4523,11 +4483,11 @@ static bool gfx_v7_0_is_idle(void *handle)
return true;
}
-static int gfx_v7_0_wait_for_idle(void *handle)
+static int gfx_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -4540,11 +4500,11 @@ static int gfx_v7_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v7_0_soft_reset(void *handle)
+static int gfx_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32(mmGRBM_STATUS);
@@ -4850,11 +4810,11 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v7_0_set_clockgating_state(void *handle,
+static int gfx_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -4873,11 +4833,11 @@ static int gfx_v7_0_set_clockgating_state(void *handle,
return 0;
}
-static int gfx_v7_0_set_powergating_state(void *handle,
+static int gfx_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
gate = true;
@@ -5009,8 +4969,6 @@ static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
.soft_reset = gfx_v7_0_soft_reset,
.set_clockgating_state = gfx_v7_0_set_clockgating_state,
.set_powergating_state = gfx_v7_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index bc8295812cc8..5ee2237d8ee8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -914,7 +914,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -982,13 +982,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_pfp_2.bin", chip_name);
if (err == -ENODEV) {
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", chip_name);
}
} else {
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", chip_name);
}
if (err)
@@ -999,13 +1002,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_me_2.bin", chip_name);
if (err == -ENODEV) {
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", chip_name);
}
} else {
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", chip_name);
}
if (err)
@@ -1017,13 +1023,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_ce_2.bin", chip_name);
if (err == -ENODEV) {
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce.bin", chip_name);
}
} else {
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce.bin", chip_name);
}
if (err)
@@ -1044,6 +1053,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
adev->virt.chained_ib_support = false;
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
@@ -1093,13 +1103,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_mec_2.bin", chip_name);
if (err == -ENODEV) {
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", chip_name);
}
} else {
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", chip_name);
}
if (err)
@@ -1112,13 +1125,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
(adev->asic_type != CHIP_TOPAZ)) {
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_OPTIONAL,
"amdgpu/%s_mec2_2.bin", chip_name);
if (err == -ENODEV) {
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec2.bin", chip_name);
}
} else {
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec2.bin", chip_name);
}
if (!err) {
@@ -1207,48 +1223,22 @@ out:
static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
volatile u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
- buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
- PACKET3_SET_CONTEXT_REG_START);
+ buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
@@ -1640,7 +1630,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
RREG32(sec_ded_counter_registers[i]);
fail:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
@@ -1894,12 +1884,12 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
-static int gfx_v8_0_sw_init(void *handle)
+static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id;
int xcc_id = 0;
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (adev->asic_type) {
case CHIP_TONGA:
@@ -2037,9 +2027,9 @@ static int gfx_v8_0_sw_init(void *handle)
return 0;
}
-static int gfx_v8_0_sw_fini(void *handle)
+static int gfx_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
@@ -4260,7 +4250,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
ring->wptr = 0;
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -4304,9 +4294,7 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32(mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32(mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32(mmRLC_CP_SCHEDULERS, tmp);
+ WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80);
}
static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
@@ -4669,60 +4657,25 @@ static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v8_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
+ gfx_v8_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v8_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
- if (!r) {
- r = gfx_v8_0_kcq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v8_0_kcq_init_queue(&adev->gfx.compute_ring[i]);
if (r)
- goto done;
+ return r;
}
gfx_v8_0_set_mec_doorbell_range(adev);
- r = gfx_v8_0_kiq_kcq_enable(adev);
- if (r)
- goto done;
-
-done:
- return r;
+ return gfx_v8_0_kiq_kcq_enable(adev);
}
static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
@@ -4783,10 +4736,10 @@ static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
gfx_v8_0_cp_compute_enable(adev, enable);
}
-static int gfx_v8_0_hw_init(void *handle)
+static int gfx_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gfx_v8_0_init_golden_registers(adev);
gfx_v8_0_constants_init(adev);
@@ -4823,6 +4776,13 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, 0);
amdgpu_ring_write(kiq_ring, 0);
}
+ /* Submit unmap queue packet */
+ amdgpu_ring_commit(kiq_ring);
+ /*
+ * Ring test will do a basic scratch register change check. Just run
+ * this to ensure that unmap queues that is submitted before got
+ * processed successfully before returning.
+ */
r = amdgpu_ring_test_helper(kiq_ring);
if (r)
DRM_ERROR("KCQ disable failed\n");
@@ -4830,9 +4790,9 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
return r;
}
-static bool gfx_v8_0_is_idle(void *handle)
+static bool gfx_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
|| RREG32(mmGRBM_STATUS2) != 0x8)
@@ -4865,13 +4825,13 @@ static int gfx_v8_0_wait_for_rlc_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v8_0_wait_for_idle(void *handle)
+static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v8_0_is_idle(handle))
+ if (gfx_v8_0_is_idle(ip_block))
return 0;
udelay(1);
@@ -4879,9 +4839,9 @@ static int gfx_v8_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int gfx_v8_0_hw_fini(void *handle)
+static int gfx_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -4897,8 +4857,9 @@ static int gfx_v8_0_hw_fini(void *handle)
pr_debug("For SRIOV client, shouldn't do anything.\n");
return 0;
}
+
amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
- if (!gfx_v8_0_wait_for_idle(adev))
+ if (!gfx_v8_0_wait_for_idle(ip_block))
gfx_v8_0_cp_enable(adev, false);
else
pr_err("cp is busy, skip halt cp\n");
@@ -4911,19 +4872,19 @@ static int gfx_v8_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v8_0_suspend(void *handle)
+static int gfx_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v8_0_hw_fini(handle);
+ return gfx_v8_0_hw_fini(ip_block);
}
-static int gfx_v8_0_resume(void *handle)
+static int gfx_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v8_0_hw_init(handle);
+ return gfx_v8_0_hw_init(ip_block);
}
-static bool gfx_v8_0_check_soft_reset(void *handle)
+static bool gfx_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
@@ -4983,9 +4944,9 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
}
}
-static int gfx_v8_0_pre_soft_reset(void *handle)
+static int gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0;
if ((!adev->gfx.grbm_soft_reset) &&
@@ -5024,9 +4985,9 @@ static int gfx_v8_0_pre_soft_reset(void *handle)
return 0;
}
-static int gfx_v8_0_soft_reset(void *handle)
+static int gfx_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
u32 tmp;
@@ -5086,9 +5047,9 @@ static int gfx_v8_0_soft_reset(void *handle)
return 0;
}
-static int gfx_v8_0_post_soft_reset(void *handle)
+static int gfx_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 grbm_soft_reset = 0;
if ((!adev->gfx.grbm_soft_reset) &&
@@ -5254,9 +5215,9 @@ static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
};
-static int gfx_v8_0_early_init(void *handle)
+static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.xcc_mask = 1;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
@@ -5271,9 +5232,9 @@ static int gfx_v8_0_early_init(void *handle)
return 0;
}
-static int gfx_v8_0_late_init(void *handle)
+static int gfx_v8_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -5313,7 +5274,7 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade
(adev->asic_type == CHIP_POLARIS12) ||
(adev->asic_type == CHIP_VEGAM))
/* Send msg to SMU via Powerplay */
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0);
WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
}
@@ -5359,10 +5320,10 @@ static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
}
}
-static int gfx_v8_0_set_powergating_state(void *handle,
+static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -5430,9 +5391,9 @@ static int gfx_v8_0_set_powergating_state(void *handle,
return 0;
}
-static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -5617,8 +5578,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t temp, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
@@ -5712,8 +5671,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
gfx_v8_0_wait_for_rlc_serdes(adev);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -5723,8 +5680,6 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
@@ -5805,12 +5760,12 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
}
gfx_v8_0_wait_for_rlc_serdes(adev);
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
if (enable) {
/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
* === MGCG + MGLS + TS(CG/LS) ===
@@ -5824,6 +5779,8 @@ static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
}
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -5974,10 +5931,10 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v8_0_set_clockgating_state(void *handle,
+static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -6947,8 +6904,6 @@ static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
.set_clockgating_state = gfx_v8_0_set_clockgating_state,
.set_powergating_state = gfx_v8_0_set_powergating_state,
.get_clockgating_state = gfx_v8_0_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 23f0573ae47b..ad9be3656653 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -225,17 +225,36 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
- /* cp header registers */
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
/* SE status registers */
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
- SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3),
+ /* packet headers */
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP)
};
static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
@@ -277,6 +296,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP)
};
enum ta_ras_gfx_subblock {
@@ -1243,7 +1270,7 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -1269,6 +1296,7 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
adev->gfx.mec_fw_write_wait = false;
if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) &&
((adev->gfx.mec_fw_version < 0x000001a5) ||
(adev->gfx.mec_feature_version < 46) ||
(adev->gfx.pfp_fw_version < 0x000000b7) ||
@@ -1429,18 +1457,21 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
int err;
err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_pfp.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_me.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_ce.bin", chip_name);
if (err)
goto out;
@@ -1476,6 +1507,7 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc_am4.bin", chip_name);
else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
(smu_version >= 0x41e2b))
@@ -1483,9 +1515,11 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
*/
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_kicker_rlc.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
@@ -1518,9 +1552,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
- "amdgpu/%s_sjt_mec.bin", chip_name);
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
@@ -1531,9 +1567,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sjt_mec2.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_mec2.bin", chip_name);
if (!err) {
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
@@ -1613,42 +1651,16 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
volatile u32 *buffer)
{
- u32 count = 0, i;
- const struct cs_section_def *sect = NULL;
- const struct cs_extent_def *ext = NULL;
+ u32 count = 0;
if (adev->gfx.rlc.cs_data == NULL)
return;
if (buffer == NULL)
return;
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- buffer[count++] = cpu_to_le32(0x80000000);
- buffer[count++] = cpu_to_le32(0x80000000);
-
- for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
- for (ext = sect->section; ext->extent != NULL; ++ext) {
- if (sect->id == SECT_CONTEXT) {
- buffer[count++] =
- cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
- buffer[count++] = cpu_to_le32(ext->reg_index -
- PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- buffer[count++] = cpu_to_le32(ext->extent[i]);
- } else {
- return;
- }
- }
- }
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
-
- buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
- buffer[count++] = cpu_to_le32(0);
+ count = amdgpu_gfx_csb_preamble_start(buffer);
+ count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
+ amdgpu_gfx_csb_preamble_end(buffer, count);
}
static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
@@ -2198,12 +2210,12 @@ static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
}
}
-static int gfx_v9_0_sw_init(void *handle)
+static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id;
int xcc_id = 0;
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
unsigned int hw_prio;
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
@@ -2223,6 +2235,37 @@ static int gfx_v9_0_sw_init(void *handle)
}
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 0, 1):
+ case IP_VERSION(9, 2, 1):
+ case IP_VERSION(9, 4, 0):
+ case IP_VERSION(9, 2, 2):
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 3, 0):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
+ if (adev->gfx.me_fw_version >= 167 &&
+ adev->gfx.pfp_fw_version >= 196 &&
+ adev->gfx.mec_fw_version >= 474) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
+ case IP_VERSION(9, 4, 2):
+ adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
+ adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
+ if (adev->gfx.mec_fw_version >= 88) {
+ adev->gfx.enable_cleaner_shader = true;
+ r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
+ if (r) {
+ adev->gfx.enable_cleaner_shader = false;
+ dev_err(adev->dev, "Failed to initialize cleaner shader\n");
+ }
+ }
+ break;
default:
adev->gfx.enable_cleaner_shader = false;
break;
@@ -2362,6 +2405,12 @@ static int gfx_v9_0_sw_init(void *handle)
}
}
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->gfx.gfx_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+
r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
@@ -2390,7 +2439,7 @@ static int gfx_v9_0_sw_init(void *handle)
gfx_v9_0_alloc_ip_dump(adev);
- r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
+ r = amdgpu_gfx_sysfs_init(adev);
if (r)
return r;
@@ -2398,10 +2447,10 @@ static int gfx_v9_0_sw_init(void *handle)
}
-static int gfx_v9_0_sw_fini(void *handle)
+static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
@@ -2418,6 +2467,8 @@ static int gfx_v9_0_sw_fini(void *handle)
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
amdgpu_gfx_kiq_fini(adev, 0);
+ amdgpu_gfx_cleaner_shader_sw_fini(adev);
+
gfx_v9_0_mec_fini(adev);
amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
&adev->gfx.rlc.clear_state_gpu_addr,
@@ -2429,7 +2480,7 @@ static int gfx_v9_0_sw_fini(void *handle)
}
gfx_v9_0_free_microcode(adev);
- amdgpu_gfx_sysfs_isolation_shader_fini(adev);
+ amdgpu_gfx_sysfs_fini(adev);
kfree(adev->gfx.ip_dump_core);
kfree(adev->gfx.ip_dump_compute_queues);
@@ -2607,7 +2658,10 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;
- WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ if (!amdgpu_sriov_vf(adev) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) {
+ WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ }
gfx_v9_0_tiling_mode_table_init(adev);
@@ -3184,6 +3238,15 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
{
u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
@@ -3265,8 +3328,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
* confirmed that the APU gfx10/gfx11 needn't such update.
*/
if (adev->flags & AMD_IS_APU &&
- adev->in_s3 && !adev->suspend_complete) {
- DRM_INFO(" Will skip the CSB packet resubmit\n");
+ adev->in_s3 && !pm_resume_via_firmware()) {
+ DRM_INFO("Will skip the CSB packet resubmit\n");
return 0;
}
r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
@@ -3346,7 +3409,7 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
- /* set the wb address wether it's enabled or not */
+ /* set the wb address whether it's enabled or not */
rptr_addr = ring->rptr_gpu_addr;
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
@@ -3393,7 +3456,15 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
} else {
WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
- (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
+ CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
adev->gfx.kiq[0].ring.sched.ready = false;
}
udelay(50);
@@ -3451,9 +3522,7 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
@@ -3842,55 +3911,23 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[0].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v9_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
+ gfx_v9_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
return 0;
}
static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ int i, r;
gfx_v9_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v9_0_kcq_init_queue(ring, false);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v9_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, 0);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, 0);
}
static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
@@ -3914,6 +3951,10 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return r;
}
+ if (adev->gfx.num_gfx_rings)
+ gfx_v9_0_cp_gfx_enable(adev, false);
+ gfx_v9_0_cp_compute_enable(adev, false);
+
r = gfx_v9_0_kiq_resume(adev);
if (r)
return r;
@@ -3970,10 +4011,10 @@ static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
gfx_v9_0_cp_compute_enable(adev, enable);
}
-static int gfx_v9_0_hw_init(void *handle)
+static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
adev->gfx.cleaner_shader_ptr);
@@ -3993,15 +4034,16 @@ static int gfx_v9_0_hw_init(void *handle)
if (r)
return r;
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) &&
+ !amdgpu_sriov_vf(adev))
gfx_v9_4_2_set_power_brake_sequence(adev);
return r;
}
-static int gfx_v9_0_hw_fini(void *handle)
+static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
@@ -4051,19 +4093,19 @@ static int gfx_v9_0_hw_fini(void *handle)
return 0;
}
-static int gfx_v9_0_suspend(void *handle)
+static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_0_hw_fini(handle);
+ return gfx_v9_0_hw_fini(ip_block);
}
-static int gfx_v9_0_resume(void *handle)
+static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_0_hw_init(handle);
+ return gfx_v9_0_hw_init(ip_block);
}
-static bool gfx_v9_0_is_idle(void *handle)
+static bool gfx_v9_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
GRBM_STATUS, GUI_ACTIVE))
@@ -4072,24 +4114,24 @@ static bool gfx_v9_0_is_idle(void *handle)
return true;
}
-static int gfx_v9_0_wait_for_idle(void *handle)
+static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v9_0_is_idle(handle))
+ if (gfx_v9_0_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int gfx_v9_0_soft_reset(void *handle)
+static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
@@ -4739,15 +4781,15 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
}
fail:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
return r;
}
-static int gfx_v9_0_early_init(void *handle)
+static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
@@ -4771,9 +4813,9 @@ static int gfx_v9_0_early_init(void *handle)
return gfx_v9_0_init_microcode(adev);
}
-static int gfx_v9_0_ecc_late_init(void *handle)
+static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/*
@@ -4805,9 +4847,9 @@ static int gfx_v9_0_ecc_late_init(void *handle)
return 0;
}
-static int gfx_v9_0_late_init(void *handle)
+static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -4822,7 +4864,7 @@ static int gfx_v9_0_late_init(void *handle)
if (r)
return r;
- r = gfx_v9_0_ecc_late_init(handle);
+ r = gfx_v9_0_ecc_late_init(ip_block);
if (r)
return r;
@@ -4915,8 +4957,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t data, def;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 1 - RLC_CGTT_MGCG_OVERRIDE */
@@ -4981,8 +5021,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
}
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
@@ -4993,8 +5031,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
if (!adev->gfx.num_gfx_rings)
return;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
/* Enable 3D CGCG/CGLS */
if (enable) {
/* write cmd to clear cgcg/cgls ov */
@@ -5036,8 +5072,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
@@ -5045,8 +5079,6 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
{
uint32_t def, data;
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
-
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
/* unset CGCG override */
@@ -5088,13 +5120,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
}
-
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
}
static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
bool enable)
{
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
if (enable) {
/* CGCG/CGLS should be enabled after MGCG/MGLS
* === MGCG + MGLS ===
@@ -5114,6 +5145,7 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === MGCG + MGLS === */
gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
}
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return 0;
}
@@ -5191,10 +5223,10 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
};
-static int gfx_v9_0_set_powergating_state(void *handle,
+static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
@@ -5202,7 +5234,7 @@ static int gfx_v9_0_set_powergating_state(void *handle,
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 3, 0):
if (!enable)
- amdgpu_gfx_off_ctrl(adev, false);
+ amdgpu_gfx_off_ctrl_immediate(adev, false);
if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
@@ -5224,10 +5256,10 @@ static int gfx_v9_0_set_powergating_state(void *handle,
gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
if (enable)
- amdgpu_gfx_off_ctrl(adev, true);
+ amdgpu_gfx_off_ctrl_immediate(adev, true);
break;
case IP_VERSION(9, 2, 1):
- amdgpu_gfx_off_ctrl(adev, enable);
+ amdgpu_gfx_off_ctrl_immediate(adev, enable);
break;
default:
break;
@@ -5236,10 +5268,10 @@ static int gfx_v9_0_set_powergating_state(void *handle,
return 0;
}
-static int gfx_v9_0_set_clockgating_state(void *handle,
+static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -5262,9 +5294,9 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -5429,16 +5461,8 @@ static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
payload_size = sizeof(struct v9_ce_ib_state);
- if (ring->is_mes_queue) {
- payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v9_gfx_meta_data, ce_payload);
- ce_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
- } else {
- payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
- ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
- }
+ payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
@@ -5461,16 +5485,8 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
payload_size = sizeof(struct v9_de_ib_state);
- if (ring->is_mes_queue) {
- payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v9_gfx_meta_data, de_payload);
- de_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
- } else {
- payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
- de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
- }
+ payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
IB_COMPLETION_STATUS_PREEMPTED;
@@ -5660,19 +5676,9 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
- if (ring->is_mes_queue) {
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v9_gfx_meta_data, ce_payload);
- ce_payload_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ce_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- } else {
- offset = offsetof(struct v9_gfx_meta_data, ce_payload);
- ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
- }
+ offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+ ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
@@ -5758,28 +5764,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo
void *de_payload_cpu_addr;
int cnt;
- if (ring->is_mes_queue) {
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gfx_meta_data) +
- offsetof(struct v9_gfx_meta_data, de_payload);
- de_payload_gpu_addr =
- amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- de_payload_cpu_addr =
- amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = offsetof(struct amdgpu_mes_ctx_meta_data,
- gfx[0].gds_backup) +
- offsetof(struct v9_gfx_meta_data, de_payload);
- gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- } else {
- offset = offsetof(struct v9_gfx_meta_data, de_payload);
- de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ offset = offsetof(struct v9_gfx_meta_data, de_payload);
+ de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
- gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
- AMDGPU_CSA_SIZE - adev->gds.gds_size,
- PAGE_SIZE);
- }
+ gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+ AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
if (usegds) {
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
@@ -7167,8 +7158,6 @@ static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
{
- int i;
-
/* Header itself is a NOP packet */
if (num_nop == 1) {
amdgpu_ring_write(ring, ring->funcs->nop);
@@ -7179,8 +7168,7 @@ static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
/* Header is at index 0, followed by num_nops - 1 NOP packet's */
- for (i = 1; i < num_nop; i++)
- amdgpu_ring_write(ring, ring->funcs->nop);
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
}
static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
@@ -7237,10 +7225,6 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
unsigned long flags;
int i, r;
- if (!adev->debug_exp_resets &&
- !adev->gfx.num_gfx_rings)
- return -EINVAL;
-
if (amdgpu_sriov_vf(adev))
return -EINVAL;
@@ -7283,20 +7267,9 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
return r;
}
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0)){
- dev_err(adev->dev, "fail to resv mqd_obj\n");
- return r;
- }
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v9_0_kcq_init_queue(ring, true);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v9_0_kcq_init_queue(ring, true);
if (r) {
- dev_err(adev->dev, "fail to unresv mqd_obj\n");
+ dev_err(adev->dev, "fail to init kcq\n");
return r;
}
spin_lock_irqsave(&kiq->ring_lock, flags);
@@ -7316,9 +7289,9 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
return amdgpu_ring_test_ring(ring);
}
-static void gfx_v9_ip_print(void *handle, struct drm_printer *p)
+static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
@@ -7345,9 +7318,14 @@ static void gfx_v9_ip_print(void *handle, struct drm_printer *p)
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
for (reg = 0; reg < reg_count; reg++) {
- drm_printf(p, "%-50s \t 0x%08x\n",
- gc_cp_reg_list_9[reg].reg_name,
- adev->gfx.ip_dump_compute_queues[index + reg]);
+ if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ "mmCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues[index + reg]);
+ else
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_9[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues[index + reg]);
}
index += reg_count;
}
@@ -7356,9 +7334,9 @@ static void gfx_v9_ip_print(void *handle, struct drm_printer *p)
}
-static void gfx_v9_ip_dump(void *handle)
+static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
@@ -7384,9 +7362,13 @@ static void gfx_v9_ip_dump(void *handle)
soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
for (reg = 0; reg < reg_count; reg++) {
- adev->gfx.ip_dump_compute_queues[index + reg] =
- RREG32(SOC15_REG_ENTRY_OFFSET(
- gc_cp_reg_list_9[reg]));
+ if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues[index + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET(
+ gc_cp_reg_list_9[reg]));
}
index += reg_count;
}
@@ -7400,11 +7382,49 @@ static void gfx_v9_ip_dump(void *handle)
static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
{
+ struct amdgpu_device *adev = ring->adev;
+
/* Emit the cleaner shader */
- amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
+ else
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER_9_0, 0));
+
amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
}
+static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ip_block *gfx_block =
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+
+ amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
+
+ /* Raven and PCO APUs seem to have stability issues
+ * with compute and gfxoff and gfx pg. Disable gfx pg during
+ * submission and allow again afterwards.
+ */
+ if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
+ gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE);
+}
+
+static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ip_block *gfx_block =
+ amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+
+ /* Raven and PCO APUs seem to have stability issues
+ * with compute and gfxoff and gfx pg. Disable gfx pg during
+ * submission and allow again afterwards.
+ */
+ if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
+ gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE);
+
+ amdgpu_gfx_enforce_isolation_ring_end_use(ring);
+}
+
static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.name = "gfx_v9_0",
.early_init = gfx_v9_0_early_init,
@@ -7581,8 +7601,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.emit_wave_limit = gfx_v9_0_emit_wave_limit,
.reset = gfx_v9_0_reset_kcq,
.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
- .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
- .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
+ .begin_use = gfx_v9_0_ring_begin_use_compute,
+ .end_use = gfx_v9_0_ring_end_use_compute,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h
index 36c0292b5110..0b6bd09b7529 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0_cleaner_shader.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: MIT */
/*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2024 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -24,3 +24,45 @@
static const u32 __maybe_unused gfx_9_0_cleaner_shader_hex[] = {
/* Add the cleaner shader code here */
};
+
+/* Define the cleaner shader gfx_9_4_2 */
+static const u32 gfx_9_4_2_cleaner_shader_hex[] = {
+ 0xbf068100, 0xbf84003b,
+ 0xbf8a0000, 0xb07c0000,
+ 0xbe8200ff, 0x00000078,
+ 0xbf110802, 0x7e000280,
+ 0x7e020280, 0x7e040280,
+ 0x7e060280, 0x7e080280,
+ 0x7e0a0280, 0x7e0c0280,
+ 0x7e0e0280, 0x80828802,
+ 0xbe803202, 0xbf84fff5,
+ 0xbf9c0000, 0xbe8200ff,
+ 0x80000000, 0x86020102,
+ 0xbf840011, 0xbefe00c1,
+ 0xbeff00c1, 0xd28c0001,
+ 0x0001007f, 0xd28d0001,
+ 0x0002027e, 0x10020288,
+ 0xbe8200bf, 0xbefc00c1,
+ 0xd89c2000, 0x00020201,
+ 0xd89c6040, 0x00040401,
+ 0x320202ff, 0x00000400,
+ 0x80828102, 0xbf84fff8,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea0180,
+ 0xbeec0180, 0xbeee0180,
+ 0xbef00180, 0xbef20180,
+ 0xbef40180, 0xbef60180,
+ 0xbef80180, 0xbefa0180,
+ 0xbf810000, 0xbf8d0001,
+ 0xbefc00ff, 0x0000005c,
+ 0xbf800000, 0xbe802c80,
+ 0xbe812c80, 0xbe822c80,
+ 0xbe832c80, 0x80fc847c,
+ 0xbf84fffa, 0xbee60080,
+ 0xbee70080, 0xbeea01ff,
+ 0x000000ee, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
index f53b379d8971..6028afd81690 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c
@@ -27,7 +27,6 @@
#include "amdgpu_gfx.h"
#include "soc15.h"
#include "soc15d.h"
-#include "amdgpu_atomfirmware.h"
#include "amdgpu_pm.h"
#include "gc/gc_9_4_1_offset.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 3f4fd2f08163..c48cd47b531f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -412,7 +412,7 @@ static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev,
r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr);
if (r) {
dev_err(adev->dev, "ib submit failed (%d).\n", r);
- amdgpu_ib_free(adev, ib, NULL);
+ amdgpu_ib_free(ib, NULL);
}
return r;
}
@@ -611,16 +611,16 @@ static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev)
}
disp2_failed:
- amdgpu_ib_free(adev, &disp_ibs[2], NULL);
+ amdgpu_ib_free(&disp_ibs[2], NULL);
dma_fence_put(fences[2]);
disp1_failed:
- amdgpu_ib_free(adev, &disp_ibs[1], NULL);
+ amdgpu_ib_free(&disp_ibs[1], NULL);
dma_fence_put(fences[1]);
disp0_failed:
- amdgpu_ib_free(adev, &disp_ibs[0], NULL);
+ amdgpu_ib_free(&disp_ibs[0], NULL);
dma_fence_put(fences[0]);
pro_end:
- amdgpu_ib_free(adev, &wb_ib, NULL);
+ amdgpu_ib_free(&wb_ib, NULL);
if (r)
dev_info(adev->dev, "Init SGPRS Failed\n");
@@ -687,10 +687,10 @@ static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev)
}
disp_failed:
- amdgpu_ib_free(adev, &disp_ib, NULL);
+ amdgpu_ib_free(&disp_ib, NULL);
dma_fence_put(fence);
pro_end:
- amdgpu_ib_free(adev, &wb_ib, NULL);
+ amdgpu_ib_free(&wb_ib, NULL);
if (r)
dev_info(adev->dev, "Init VGPRS Failed\n");
@@ -1547,7 +1547,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
{
uint32_t bank, way, mem;
static const char * const vml2_way_str[] = { "BIGK", "4K" };
- static const char * const utcl2_rounter_str[] = { "VMC", "APT" };
+ static const char * const utcl2_router_str[] = { "VMC", "APT" };
mem = instance % blk->num_mem_blocks;
way = (instance / blk->num_mem_blocks) % blk->num_ways;
@@ -1568,7 +1568,7 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev,
dev_info(
adev->dev,
"GFX SubBlock UTCL2_ROUTER_IFIF%d_GROUP0_%s, SED %d, DED %d\n",
- bank, utcl2_rounter_str[mem], sec_cnt, ded_cnt);
+ bank, utcl2_router_str[mem], sec_cnt, ded_cnt);
break;
case ATC_L2_CACHE_2M:
dev_info(
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm
new file mode 100644
index 000000000000..35b8cf9070bd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2_cleaner_shader.asm
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
+//To turn this shader program on for complitaion change this to main and lower shader main to main_1
+
+// MI200 : Clear SGPRs, VGPRs and LDS
+// Uses two kernels launched separately:
+// 1. Clean VGPRs, LDS, and lower SGPRs
+// Launches one workgroup per CU, each workgroup with 4x wave64 per SIMD in the CU
+// Waves are "wave64" and have 128 VGPRs each, which uses all 512 VGPRs per SIMD
+// Waves in the workgroup share the 64KB of LDS
+// Each wave clears SGPRs 0 - 95. Because there are 4 waves/SIMD, this is physical SGPRs 0-383
+// Each wave clears 128 VGPRs, so all 512 in the SIMD
+// The first wave of the workgroup clears its 64KB of LDS
+// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
+// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.
+// 2. Clean remaining SGPRs
+// Launches a workgroup with 24 waves per workgroup, yielding 6 waves per SIMD in each CU
+// Waves are allocating 96 SGPRs
+// CP sets up SPI_RESOURCE_RESERVE_* registers to prevent these waves from allocating SGPRs 0-223.
+// As such, these 6 waves per SIMD are allocated physical SGPRs 224-799
+// Barriers do not work for >16 waves per workgroup, so we cannot start with S_BARRIER
+// Instead, the shader starts with an S_SETHALT 1. Once all waves are launched CP will send unhalt command
+// The shader then clears all SGPRs allocated to it, cleaning out physical SGPRs 224-799
+
+shader main
+ asic(MI200)
+ type(CS)
+ wave_size(64)
+// Note: original source code from SQ team
+
+// (theorhetical fastest = ~512clks vgpr + 1536 lds + ~128 sgpr = 2176 clks)
+
+ s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_3
+ s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s3 == 1)
+ S_BARRIER
+
+ s_movk_i32 m0, 0x0000
+ s_mov_b32 s2, 0x00000078 // Loop 128/8=16 times (loop unrolled for performance)
+ //
+ // CLEAR VGPRs
+ //
+ s_set_gpr_idx_on s2, 0x8 // enable Dest VGPR indexing
+label_0005:
+ v_mov_b32 v0, 0
+ v_mov_b32 v1, 0
+ v_mov_b32 v2, 0
+ v_mov_b32 v3, 0
+ v_mov_b32 v4, 0
+ v_mov_b32 v5, 0
+ v_mov_b32 v6, 0
+ v_mov_b32 v7, 0
+ s_sub_u32 s2, s2, 8
+ s_set_gpr_idx_idx s2
+ s_cbranch_scc0 label_0005
+ s_set_gpr_idx_off
+
+ //
+ //
+
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_cbranch_scc0 label_clean_sgpr_1 // Clean LDS if its first wave of ThreadGroup/WorkGroup
+ // CLEAR LDS
+ //
+ s_mov_b32 exec_lo, 0xffffffff
+ s_mov_b32 exec_hi, 0xffffffff
+ v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
+ v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
+ v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
+ s_mov_b32 s2, 0x00000003f // 64 loop iterations
+ s_mov_b32 m0, 0xffffffff
+ // Clear all of LDS space
+ // Each FirstWave of WorkGroup clears 64kbyte block
+
+label_001F:
+ ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
+ ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
+ v_add_co_u32 v1, vcc, 0x00000400, v1
+ s_sub_u32 s2, s2, 1
+ s_cbranch_scc0 label_001F
+ //
+ // CLEAR SGPRs
+ //
+label_clean_sgpr_1:
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop:
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0 //clear vcc
+ s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
+ s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
+ s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
+ s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
+ s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
+ s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
+ s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
+ s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15
+s_endpgm
+
+label_0023:
+
+ s_sethalt 1
+
+ s_mov_b32 m0, 0x0000005c // Loop 96/4=24 times (loop unrolled for performance)
+ s_nop 0
+label_sgpr_loop1:
+
+ s_movreld_b32 s0, 0
+ s_movreld_b32 s1, 0
+ s_movreld_b32 s2, 0
+ s_movreld_b32 s3, 0
+ s_sub_u32 m0, m0, 4
+ s_cbranch_scc0 label_sgpr_loop1
+
+ //clear vcc, flat scratch
+ s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
+ s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
+ s_mov_b64 vcc, 0xee //clear vcc
+
+s_endpgm
+end
+
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index c100845409f7..c233edf60569 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -43,8 +43,12 @@
MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_5_0_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_5_0_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin");
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -52,10 +56,6 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
#define GOLDEN_GB_ADDR_CONFIG 0x2a114042
#define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301
-#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */
-#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
-#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
-
#define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */
#define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */
#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */
@@ -105,9 +105,6 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9_4_3[] = {
SOC15_REG_ENTRY_STR(GC, 0, regRLC_SMU_SAFE_MODE),
SOC15_REG_ENTRY_STR(GC, 0, regRLC_INT_STAT),
SOC15_REG_ENTRY_STR(GC, 0, regRLC_GPM_GENERAL_6),
- /* cp header registers */
- SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
- SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME2_HEADER_DUMP),
/* SE status registers */
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
@@ -154,6 +151,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9_4_3[] = {
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GFX_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
+ SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
};
struct amdgpu_gfx_ras gfx_v9_4_3_ras;
@@ -349,14 +354,7 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev)
WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG,
GOLDEN_GB_ADDR_CONFIG);
- /* Golden settings applied by driver for ASIC with rev_id 0 */
- if (adev->rev_id == 0) {
- WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1,
- REDUCE_FIFO_DEPTH_BY_2, 2);
- } else {
- WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2,
- SPARE, 0x1);
- }
+ WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, SPARE, 0x1);
}
}
@@ -499,7 +497,7 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err2:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err1:
amdgpu_device_wb_free(adev, index);
@@ -543,6 +541,7 @@ static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev,
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_rlc.bin", chip_name);
if (err)
goto out;
@@ -558,24 +557,24 @@ out:
return err;
}
-static bool gfx_v9_4_3_should_disable_gfxoff(struct pci_dev *pdev)
-{
- return true;
-}
-
-static void gfx_v9_4_3_check_if_need_gfxoff(struct amdgpu_device *adev)
-{
- if (gfx_v9_4_3_should_disable_gfxoff(adev->pdev))
- adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
-}
-
static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
const char *chip_name)
{
int err;
- err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
- "amdgpu/%s_mec.bin", chip_name);
+ if (amdgpu_sriov_vf(adev)) {
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_sjt_mec.bin", chip_name);
+
+ if (err)
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
+ } else
+ err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
+ AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mec.bin", chip_name);
if (err)
goto out;
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
@@ -584,8 +583,6 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev,
adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
- gfx_v9_4_3_check_if_need_gfxoff(adev);
-
out:
if (err)
amdgpu_ucode_release(&adev->gfx.mec_fw);
@@ -875,12 +872,13 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle,
switch (type) {
case ACA_SMU_TYPE_UE:
- ret = aca_error_cache_log_bank_error(handle, &info,
- ACA_ERROR_TYPE_UE, 1ULL);
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL);
break;
case ACA_SMU_TYPE_CE:
- ret = aca_error_cache_log_bank_error(handle, &info,
- ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0));
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
break;
default:
return -EINVAL;
@@ -921,27 +919,15 @@ static const struct aca_info gfx_v9_4_3_aca_info = {
static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)
{
- u32 gb_addr_config;
-
adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs;
adev->gfx.ras = &gfx_v9_4_3_ras;
- switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
- case IP_VERSION(9, 4, 3):
- case IP_VERSION(9, 4, 4):
- adev->gfx.config.max_hw_contexts = 8;
- adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
- adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
- adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
- adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
- gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG);
- break;
- default:
- BUG();
- break;
- }
-
- adev->gfx.config.gb_addr_config = gb_addr_config;
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ adev->gfx.config.gb_addr_config = GOLDEN_GB_ADDR_CONFIG;
adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
REG_GET_FIELD(
@@ -1049,10 +1035,10 @@ static void gfx_v9_4_3_alloc_ip_dump(struct amdgpu_device *adev)
}
}
-static int gfx_v9_4_3_sw_init(void *handle)
+static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block)
{
int i, j, k, r, ring_id, xcc_id, num_xcc;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
@@ -1157,6 +1143,25 @@ static int gfx_v9_4_3_sw_init(void *handle)
return r;
}
+ adev->gfx.compute_supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ if (adev->gfx.mec_fw_version >= 155) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
+ }
+ break;
+ case IP_VERSION(9, 5, 0):
+ if (adev->gfx.mec_fw_version >= 21) {
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE;
+ }
+ break;
+ default:
+ break;
+ }
r = gfx_v9_4_3_gpu_early_init(adev);
if (r)
return r;
@@ -1165,26 +1170,19 @@ static int gfx_v9_4_3_sw_init(void *handle)
if (r)
return r;
-
- if (!amdgpu_sriov_vf(adev)) {
- r = amdgpu_gfx_sysfs_init(adev);
- if (r)
- return r;
- }
-
- gfx_v9_4_3_alloc_ip_dump(adev);
-
- r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
+ r = amdgpu_gfx_sysfs_init(adev);
if (r)
return r;
+ gfx_v9_4_3_alloc_ip_dump(adev);
+
return 0;
}
-static int gfx_v9_4_3_sw_fini(void *handle)
+static int gfx_v9_4_3_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, num_xcc;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++)
@@ -1201,9 +1199,7 @@ static int gfx_v9_4_3_sw_fini(void *handle)
gfx_v9_4_3_mec_fini(adev);
amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
gfx_v9_4_3_free_microcode(adev);
- if (!amdgpu_sriov_vf(adev))
- amdgpu_gfx_sysfs_fini(adev);
- amdgpu_gfx_sysfs_isolation_shader_fini(adev);
+ amdgpu_gfx_sysfs_fini(adev);
kfree(adev->gfx.ip_dump_core);
kfree(adev->gfx.ip_dump_compute_queues);
@@ -1247,8 +1243,10 @@ static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev,
soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
mutex_unlock(&adev->srbm_mutex);
- /* Initialize all compute VMIDs to have no GDS, GWS, or OA
- acccess. These should be enabled by FW for target VMIDs. */
+ /*
+ * Initialize all compute VMIDs to have no GDS, GWS, or OA
+ * access. These should be enabled by FW for target VMIDs.
+ */
for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * i, 0);
WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * i, 0);
@@ -1275,6 +1273,22 @@ static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id)
}
}
+/* For ASICs that needs xnack chain and MEC version supports, set SG_CONFIG1
+ * DISABLE_XNACK_CHECK_IN_RETRY_DISABLE bit and inform KFD to set xnack_chain
+ * bit in SET_RESOURCES
+ */
+static void gfx_v9_4_3_xcc_init_sq(struct amdgpu_device *adev, int xcc_id)
+{
+ uint32_t data;
+
+ if (!(adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
+ return;
+
+ data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_CONFIG1);
+ data = REG_SET_FIELD(data, SQ_CONFIG1, DISABLE_XNACK_CHECK_IN_RETRY_DISABLE, 1);
+ WREG32_SOC15(GC, xcc_id, regSQ_CONFIG1, data);
+}
+
static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev,
int xcc_id)
{
@@ -1319,6 +1333,7 @@ static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev,
gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id);
gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id);
+ gfx_v9_4_3_xcc_init_sq(adev, xcc_id);
}
static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev)
@@ -1331,6 +1346,20 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev)
adev->gfx.config.db_debug2 =
RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2);
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ /* ToDo: GC 9.4.4 */
+ case IP_VERSION(9, 4, 3):
+ if (adev->gfx.mec_fw_version >= 184)
+ adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
+ break;
+ case IP_VERSION(9, 5, 0):
+ if (adev->gfx.mec_fw_version >= 23)
+ adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN;
+ break;
+ default:
+ break;
+ }
+
for (i = 0; i < num_xcc; i++)
gfx_v9_4_3_xcc_constants_init(adev, i);
}
@@ -1346,10 +1375,8 @@ static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device *adev, int xcc_id)
{
/*
* Rlc save restore list is workable since v2_1.
- * And it's needed by gfxoff feature.
*/
- if (adev->gfx.rlc.is_rlc_v2_1)
- gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id);
+ gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id);
}
static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id)
@@ -1773,9 +1800,7 @@ static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id)
tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
@@ -1836,7 +1861,7 @@ static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id)
DOORBELL_SOURCE, 0);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_HIT, 0);
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_multi_vf_mode(adev))
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
DOORBELL_MODE, 1);
} else {
@@ -2184,55 +2209,27 @@ static int gfx_v9_4_3_xcc_kcq_fini_register(struct amdgpu_device *adev, int xcc_
static int gfx_v9_4_3_xcc_kiq_resume(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_ring *ring;
- int r;
-
- ring = &adev->gfx.kiq[xcc_id].ring;
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- return r;
-
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (unlikely(r != 0)) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
-
- gfx_v9_4_3_xcc_kiq_init_queue(ring, xcc_id);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
+ gfx_v9_4_3_xcc_kiq_init_queue(&adev->gfx.kiq[xcc_id].ring, xcc_id);
return 0;
}
static int gfx_v9_4_3_xcc_kcq_resume(struct amdgpu_device *adev, int xcc_id)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
+ struct amdgpu_ring *ring;
+ int i, r;
gfx_v9_4_3_xcc_cp_compute_enable(adev, true, xcc_id);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
- ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
-
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0))
- goto done;
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ ring = &adev->gfx.compute_ring[i + xcc_id *
+ adev->gfx.num_compute_rings];
+
+ r = gfx_v9_4_3_xcc_kcq_init_queue(ring, xcc_id, false);
if (r)
- goto done;
+ return r;
}
- r = amdgpu_gfx_enable_kcq(adev, xcc_id);
-done:
- return r;
+ return amdgpu_gfx_enable_kcq(adev, xcc_id);
}
static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id)
@@ -2343,10 +2340,10 @@ static void gfx_v9_4_3_xcc_fini(struct amdgpu_device *adev, int xcc_id)
gfx_v9_4_3_xcc_cp_compute_enable(adev, false, xcc_id);
}
-static int gfx_v9_4_3_hw_init(void *handle)
+static int gfx_v9_4_3_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
adev->gfx.cleaner_shader_ptr);
@@ -2367,9 +2364,9 @@ static int gfx_v9_4_3_hw_init(void *handle)
return r;
}
-static int gfx_v9_4_3_hw_fini(void *handle)
+static int gfx_v9_4_3_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, num_xcc;
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
@@ -2384,19 +2381,19 @@ static int gfx_v9_4_3_hw_fini(void *handle)
return 0;
}
-static int gfx_v9_4_3_suspend(void *handle)
+static int gfx_v9_4_3_suspend(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_4_3_hw_fini(handle);
+ return gfx_v9_4_3_hw_fini(ip_block);
}
-static int gfx_v9_4_3_resume(void *handle)
+static int gfx_v9_4_3_resume(struct amdgpu_ip_block *ip_block)
{
- return gfx_v9_4_3_hw_init(handle);
+ return gfx_v9_4_3_hw_init(ip_block);
}
-static bool gfx_v9_4_3_is_idle(void *handle)
+static bool gfx_v9_4_3_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, num_xcc;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
@@ -2408,24 +2405,24 @@ static bool gfx_v9_4_3_is_idle(void *handle)
return true;
}
-static int gfx_v9_4_3_wait_for_idle(void *handle)
+static int gfx_v9_4_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gfx_v9_4_3_is_idle(handle))
+ if (gfx_v9_4_3_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int gfx_v9_4_3_soft_reset(void *handle)
+static int gfx_v9_4_3_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 grbm_soft_reset = 0;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, GET_INST(GC, 0), regGRBM_STATUS);
@@ -2509,9 +2506,9 @@ static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
-static int gfx_v9_4_3_early_init(void *handle)
+static int gfx_v9_4_3_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
AMDGPU_MAX_COMPUTE_RINGS);
@@ -2527,9 +2524,9 @@ static int gfx_v9_4_3_early_init(void *handle)
return gfx_v9_4_3_init_microcode(adev);
}
-static int gfx_v9_4_3_late_init(void *handle)
+static int gfx_v9_4_3_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
@@ -2758,38 +2755,32 @@ static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = {
.is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range,
};
-static int gfx_v9_4_3_set_powergating_state(void *handle,
+static int gfx_v9_4_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static int gfx_v9_4_3_set_clockgating_state(void *handle,
+static int gfx_v9_4_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, num_xcc;
if (amdgpu_sriov_vf(adev))
return 0;
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
- switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
- case IP_VERSION(9, 4, 3):
- case IP_VERSION(9, 4, 4):
- for (i = 0; i < num_xcc; i++)
- gfx_v9_4_3_xcc_update_gfx_clock_gating(
- adev, state == AMD_CG_STATE_GATE, i);
- break;
- default:
- break;
- }
+ for (i = 0; i < num_xcc; i++)
+ gfx_v9_4_3_xcc_update_gfx_clock_gating(
+ adev, state == AMD_CG_STATE_GATE, i);
+
return 0;
}
-static void gfx_v9_4_3_get_clockgating_state(void *handle, u64 *flags)
+static void gfx_v9_4_3_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -3056,9 +3047,6 @@ static void gfx_v9_4_3_ring_soft_recovery(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
uint32_t value = 0;
- if (!adev->debug_exp_resets)
- return;
-
value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
@@ -3501,9 +3489,7 @@ static int gfx_v9_4_3_unmap_done(struct amdgpu_device *adev, uint32_t me,
static bool gfx_v9_4_3_pipe_reset_support(struct amdgpu_device *adev)
{
- /*TODO: Need check gfx9.4.4 mec fw whether supports pipe reset as well.*/
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
- adev->gfx.mec_fw_version >= 0x0000009b)
+ if (!!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE))
return true;
else
dev_warn_once(adev->dev, "Please use the latest MEC version to see whether support pipe reset\n");
@@ -3574,9 +3560,6 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
unsigned long flags;
int r;
- if (!adev->debug_exp_resets)
- return -EINVAL;
-
if (amdgpu_sriov_vf(adev))
return -EINVAL;
@@ -3616,20 +3599,9 @@ pipe_reset:
return r;
}
- r = amdgpu_bo_reserve(ring->mqd_obj, false);
- if (unlikely(r != 0)){
- dev_err(adev->dev, "fail to resv mqd_obj\n");
- return r;
- }
- r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
- amdgpu_bo_unreserve(ring->mqd_obj);
+ r = gfx_v9_4_3_xcc_kcq_init_queue(ring, ring->xcc_id, true);
if (r) {
- dev_err(adev->dev, "fail to unresv mqd_obj\n");
+ dev_err(adev->dev, "fail to init kcq\n");
return r;
}
spin_lock_irqsave(&kiq->ring_lock, flags);
@@ -4567,8 +4539,6 @@ static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev)
static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
{
- int i;
-
/* Header itself is a NOP packet */
if (num_nop == 1) {
amdgpu_ring_write(ring, ring->funcs->nop);
@@ -4579,13 +4549,12 @@ static void gfx_v9_4_3_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_no
amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
/* Header is at index 0, followed by num_nops - 1 NOP packet's */
- for (i = 1; i < num_nop; i++)
- amdgpu_ring_write(ring, ring->funcs->nop);
+ amdgpu_ring_insert_nop(ring, num_nop - 1);
}
-static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p)
+static void gfx_v9_4_3_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t i, j, k;
uint32_t xcc_id, xcc_offset, inst_offset;
uint32_t num_xcc, reg, num_inst;
@@ -4629,12 +4598,21 @@ static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p)
"\nxcc:%d mec:%d, pipe:%d, queue:%d\n",
xcc_id, i, j, k);
for (reg = 0; reg < reg_count; reg++) {
- drm_printf(p,
- "%-50s \t 0x%08x\n",
- gc_cp_reg_list_9_4_3[reg].reg_name,
- adev->gfx.ip_dump_compute_queues
- [xcc_offset + inst_offset +
- reg]);
+ if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ drm_printf(p,
+ "%-50s \t 0x%08x\n",
+ "regCP_MEC_ME2_HEADER_DUMP",
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset + inst_offset +
+ reg]);
+ else
+ drm_printf(p,
+ "%-50s \t 0x%08x\n",
+ gc_cp_reg_list_9_4_3[reg].reg_name,
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset + inst_offset +
+ reg]);
}
inst_offset += reg_count;
}
@@ -4643,9 +4621,9 @@ static void gfx_v9_4_3_ip_print(void *handle, struct drm_printer *p)
}
}
-static void gfx_v9_4_3_ip_dump(void *handle)
+static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t i, j, k;
uint32_t num_xcc, reg, num_inst;
uint32_t xcc_id, xcc_offset, inst_offset;
@@ -4656,7 +4634,6 @@ static void gfx_v9_4_3_ip_dump(void *handle)
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
- amdgpu_gfx_off_ctrl(adev, false);
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
xcc_offset = xcc_id * reg_count;
for (i = 0; i < reg_count; i++)
@@ -4664,7 +4641,6 @@ static void gfx_v9_4_3_ip_dump(void *handle)
RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i],
GET_INST(GC, xcc_id)));
}
- amdgpu_gfx_off_ctrl(adev, true);
/* dump compute queue registers for all instances */
if (!adev->gfx.ip_dump_compute_queues)
@@ -4673,7 +4649,6 @@ static void gfx_v9_4_3_ip_dump(void *handle)
num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3);
- amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->srbm_mutex);
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
xcc_offset = xcc_id * reg_count * num_inst;
@@ -4686,12 +4661,20 @@ static void gfx_v9_4_3_ip_dump(void *handle)
GET_INST(GC, xcc_id));
for (reg = 0; reg < reg_count; reg++) {
- adev->gfx.ip_dump_compute_queues
- [xcc_offset +
- inst_offset + reg] =
- RREG32(SOC15_REG_ENTRY_OFFSET_INST(
- gc_cp_reg_list_9_4_3[reg],
- GET_INST(GC, xcc_id)));
+ if (i && gc_cp_reg_list_9_4_3[reg].reg_offset ==
+ regCP_MEC_ME1_HEADER_DUMP)
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset +
+ inst_offset + reg] =
+ RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id),
+ regCP_MEC_ME2_HEADER_DUMP));
+ else
+ adev->gfx.ip_dump_compute_queues
+ [xcc_offset +
+ inst_offset + reg] =
+ RREG32(SOC15_REG_ENTRY_OFFSET_INST(
+ gc_cp_reg_list_9_4_3[reg],
+ GET_INST(GC, xcc_id)));
}
inst_offset += reg_count;
}
@@ -4700,7 +4683,6 @@ static void gfx_v9_4_3_ip_dump(void *handle)
}
soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- amdgpu_gfx_off_ctrl(adev, true);
}
static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
@@ -4859,32 +4841,13 @@ static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
{
- /* init asci gds info */
- switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
- case IP_VERSION(9, 4, 3):
- case IP_VERSION(9, 4, 4):
- /* 9.4.3 removed all the GDS internal memory,
- * only support GWS opcode in kernel, like barrier
- * semaphore.etc */
- adev->gds.gds_size = 0;
- break;
- default:
- adev->gds.gds_size = 0x10000;
- break;
- }
-
- switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
- case IP_VERSION(9, 4, 3):
- case IP_VERSION(9, 4, 4):
- /* deprecated for 9.4.3, no usage at all */
- adev->gds.gds_compute_max_wave_id = 0;
- break;
- default:
- /* this really depends on the chip */
- adev->gds.gds_compute_max_wave_id = 0x7ff;
- break;
- }
+ /* 9.4.3 variants removed all the GDS internal memory,
+ * only support GWS opcode in kernel, like barrier
+ * semaphore.etc */
+ /* init asic gds info */
+ adev->gds.gds_size = 0;
+ adev->gds.gds_compute_max_wave_id = 0;
adev->gds.gws_size = 64;
adev->gds.oa_size = 16;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 0e3ddea7b8e0..a7bfc9f41d0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -92,12 +92,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
- /* Program the AGP BAR */
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
- WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
-
if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) {
+ /* Program the AGP BAR */
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
/* Program the system aperture low logical page number. */
WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index ed8e130c7d19..cb25f7f0dfc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -313,6 +313,16 @@ gfxhub_v1_2_xcc_disable_identity_aperture(struct amdgpu_device *adev,
}
}
+static inline bool
+gfxhub_v1_2_per_process_xnack_support(struct amdgpu_device *adev)
+{
+ /*
+ * TODO: Check if this function is really needed, so far only 9.4.3
+ * variants use GFXHUB 1.2
+ */
+ return !!adev->aid_mask;
+}
+
static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
uint32_t xcc_mask)
{
@@ -355,7 +365,7 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
PAGE_TABLE_BLOCK_SIZE,
block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm.
- * On 9.4.2 and 9.4.3, XNACK can be enabled in
+ * On 9.4.3 variants, XNACK can be enabled in
* the SQ per-process.
* Retry faults need to be enabled for that to work.
*/
@@ -363,12 +373,8 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!adev->gmc.noretry ||
- amdgpu_ip_version(adev, GC_HWIP, 0) ==
- IP_VERSION(9, 4, 2) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) ==
- IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) ==
- IP_VERSION(9, 4, 4));
+ gfxhub_v1_2_per_process_xnack_support(
+ adev));
WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index 17509f32f61a..deb95fab02df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -505,42 +505,6 @@ static void gfxhub_v2_1_init(struct amdgpu_device *adev)
hub->vmhub_funcs = &gfxhub_v2_1_vmhub_funcs;
}
-static int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev)
-{
- u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_CNTL);
- u32 max_region =
- REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
- u32 max_num_physical_nodes = 0;
- u32 max_physical_node_id = 0;
-
- switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
- case IP_VERSION(4, 8, 0):
- max_num_physical_nodes = 4;
- max_physical_node_id = 3;
- break;
- default:
- return -EINVAL;
- }
-
- /* PF_MAX_REGION=0 means xgmi is disabled */
- if (max_region) {
- adev->gmc.xgmi.num_physical_nodes = max_region + 1;
- if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes)
- return -EINVAL;
-
- adev->gmc.xgmi.physical_node_id =
- REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
- if (adev->gmc.xgmi.physical_node_id > max_physical_node_id)
- return -EINVAL;
-
- adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
- RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_SIZE),
- GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
- }
-
- return 0;
-}
-
static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev)
{
int i;
@@ -696,7 +660,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = {
.gart_disable = gfxhub_v2_1_gart_disable,
.set_fault_enable_default = gfxhub_v2_1_set_fault_enable_default,
.init = gfxhub_v2_1_init,
- .get_xgmi_info = gfxhub_v2_1_get_xgmi_info,
.utcl2_harvest = gfxhub_v2_1_utcl2_harvest,
.mode2_save_regs = gfxhub_v2_1_save_regs,
.mode2_restore_regs = gfxhub_v2_1_restore_regs,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9784a2892185..a3e2787501f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -175,7 +175,10 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
addr, entry->client_id,
soc15_ih_clientid_name[entry->client_id]);
- if (!amdgpu_sriov_vf(adev))
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
hub->vmhub_funcs->print_l2_protection_fault_status(adev,
status);
@@ -265,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
/* flush hdp cache */
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
/* This is necessary for SRIOV as well as for GFXOFF to function
* properly under bare metal
@@ -425,10 +428,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
- /* MES fw manages IH_VMID_x_LUT updating */
- if (ring->is_mes_queue)
- return;
-
if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
else
@@ -630,9 +629,9 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
}
-static int gmc_v10_0_early_init(void *handle)
+static int gmc_v10_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v10_0_set_mmhub_funcs(adev);
gmc_v10_0_set_gfxhub_funcs(adev);
@@ -651,9 +650,9 @@ static int gmc_v10_0_early_init(void *handle)
return 0;
}
-static int gmc_v10_0_late_init(void *handle)
+static int gmc_v10_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
@@ -769,10 +768,10 @@ static int gmc_v10_0_gart_init(struct amdgpu_device *adev)
return amdgpu_gart_table_vram_alloc(adev);
}
-static int gmc_v10_0_sw_init(void *handle)
+static int gmc_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->gfxhub.funcs->init(adev);
@@ -920,9 +919,9 @@ static void gmc_v10_0_gart_fini(struct amdgpu_device *adev)
amdgpu_gart_table_vram_free(adev);
}
-static int gmc_v10_0_sw_fini(void *handle)
+static int gmc_v10_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_vm_manager_fini(adev);
gmc_v10_0_gart_fini(adev);
@@ -966,7 +965,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
adev->hdp.funcs->init_registers(adev);
/* Flush HDP after it is initialized */
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;
@@ -985,9 +984,9 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
return 0;
}
-static int gmc_v10_0_hw_init(void *handle)
+static int gmc_v10_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
@@ -1032,9 +1031,9 @@ static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
adev->mmhub.funcs->gart_disable(adev);
}
-static int gmc_v10_0_hw_fini(void *handle)
+static int gmc_v10_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v10_0_gart_disable(adev);
@@ -1053,51 +1052,43 @@ static int gmc_v10_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v10_0_suspend(void *handle)
+static int gmc_v10_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v10_0_hw_fini(adev);
+ gmc_v10_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v10_0_resume(void *handle)
+static int gmc_v10_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v10_0_hw_init(adev);
+ r = gmc_v10_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v10_0_is_idle(void *handle)
+static bool gmc_v10_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* MC is always ready in GMC v10.*/
return true;
}
-static int gmc_v10_0_wait_for_idle(void *handle)
+static int gmc_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* There is no need to wait for MC idle in GMC v10.*/
return 0;
}
-static int gmc_v10_0_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int gmc_v10_0_set_clockgating_state(void *handle,
+static int gmc_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled
@@ -1120,9 +1111,9 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
return athub_v2_0_set_clockgating(adev, state);
}
-static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags)
+static void gmc_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 4))
@@ -1136,7 +1127,7 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags)
athub_v2_0_get_clockgating(adev, flags);
}
-static int gmc_v10_0_set_powergating_state(void *handle,
+static int gmc_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1154,7 +1145,6 @@ const struct amd_ip_funcs gmc_v10_0_ip_funcs = {
.resume = gmc_v10_0_resume,
.is_idle = gmc_v10_0_is_idle,
.wait_for_idle = gmc_v10_0_wait_for_idle,
- .soft_reset = gmc_v10_0_soft_reset,
.set_clockgating_state = gmc_v10_0_set_clockgating_state,
.set_powergating_state = gmc_v10_0_set_powergating_state,
.get_clockgating_state = gmc_v10_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 2797fd84432b..72211409227b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -144,7 +144,10 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev,
dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
addr, entry->client_id);
- if (!amdgpu_sriov_vf(adev))
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
}
@@ -226,7 +229,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
/* flush hdp cache */
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
/* This is necessary for SRIOV as well as for GFXOFF to function
* properly under bare metal
@@ -390,10 +393,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
- /* MES fw manages IH_VMID_x_LUT updating */
- if (ring->is_mes_queue)
- return;
-
if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
else
@@ -576,6 +575,7 @@ static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev)
break;
case IP_VERSION(3, 3, 0):
case IP_VERSION(3, 3, 1):
+ case IP_VERSION(3, 3, 2):
adev->mmhub.funcs = &mmhub_v3_3_funcs;
break;
default:
@@ -593,6 +593,7 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs;
break;
default:
@@ -601,9 +602,9 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev)
}
}
-static int gmc_v11_0_early_init(void *handle)
+static int gmc_v11_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v11_0_set_gfxhub_funcs(adev);
gmc_v11_0_set_mmhub_funcs(adev);
@@ -622,9 +623,9 @@ static int gmc_v11_0_early_init(void *handle)
return 0;
}
-static int gmc_v11_0_late_init(void *handle)
+static int gmc_v11_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
@@ -729,10 +730,10 @@ static int gmc_v11_0_gart_init(struct amdgpu_device *adev)
return amdgpu_gart_table_vram_alloc(adev);
}
-static int gmc_v11_0_sw_init(void *handle)
+static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->init(adev);
@@ -747,6 +748,18 @@ static int gmc_v11_0_sw_init(void *handle)
adev->gmc.vram_type = vram_type;
adev->gmc.vram_vendor = vram_vendor;
+ /* The mall_size is already calculated as mall_size_per_umc * num_umc.
+ * However, for gfx1151, which features a 2-to-1 UMC mapping,
+ * the result must be multiplied by 2 to determine the actual mall size.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 5, 1):
+ adev->gmc.mall_size *= 2;
+ break;
+ default:
+ break;
+ }
+
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
@@ -756,6 +769,7 @@ static int gmc_v11_0_sw_init(void *handle)
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
case IP_VERSION(11, 5, 2):
+ case IP_VERSION(11, 5, 3):
set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask);
/*
@@ -826,7 +840,7 @@ static int gmc_v11_0_sw_init(void *handle)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
- adev->vm_manager.first_kfd_vmid = 8;
+ adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
amdgpu_vm_manager_init(adev);
@@ -849,9 +863,9 @@ static void gmc_v11_0_gart_fini(struct amdgpu_device *adev)
amdgpu_gart_table_vram_free(adev);
}
-static int gmc_v11_0_sw_fini(void *handle)
+static int gmc_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_vm_manager_fini(adev);
gmc_v11_0_gart_fini(adev);
@@ -893,7 +907,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
return r;
/* Flush HDP after it is initialized */
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;
@@ -908,9 +922,9 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
return 0;
}
-static int gmc_v11_0_hw_init(void *handle)
+static int gmc_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode;
@@ -940,9 +954,9 @@ static void gmc_v11_0_gart_disable(struct amdgpu_device *adev)
adev->mmhub.funcs->gart_disable(adev);
}
-static int gmc_v11_0_hw_fini(void *handle)
+static int gmc_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev)) {
/* full access mode, so don't touch any GMC register */
@@ -961,51 +975,43 @@ static int gmc_v11_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v11_0_suspend(void *handle)
+static int gmc_v11_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v11_0_hw_fini(adev);
+ gmc_v11_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v11_0_resume(void *handle)
+static int gmc_v11_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v11_0_hw_init(adev);
+ r = gmc_v11_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v11_0_is_idle(void *handle)
+static bool gmc_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* MC is always ready in GMC v11.*/
return true;
}
-static int gmc_v11_0_wait_for_idle(void *handle)
+static int gmc_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* There is no need to wait for MC idle in GMC v11.*/
return 0;
}
-static int gmc_v11_0_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int gmc_v11_0_set_clockgating_state(void *handle,
+static int gmc_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = adev->mmhub.funcs->set_clockgating(adev, state);
if (r)
@@ -1014,16 +1020,16 @@ static int gmc_v11_0_set_clockgating_state(void *handle,
return athub_v3_0_set_clockgating(adev, state);
}
-static void gmc_v11_0_get_clockgating_state(void *handle, u64 *flags)
+static void gmc_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->get_clockgating(adev, flags);
athub_v3_0_get_clockgating(adev, flags);
}
-static int gmc_v11_0_set_powergating_state(void *handle,
+static int gmc_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1041,7 +1047,6 @@ const struct amd_ip_funcs gmc_v11_0_ip_funcs = {
.resume = gmc_v11_0_resume,
.is_idle = gmc_v11_0_is_idle,
.wait_for_idle = gmc_v11_0_wait_for_idle,
- .soft_reset = gmc_v11_0_soft_reset,
.set_clockgating_state = gmc_v11_0_set_clockgating_state,
.set_powergating_state = gmc_v11_0_set_powergating_state,
.get_clockgating_state = gmc_v11_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index edcb5351f8cc..b645d3e6a6c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -40,7 +40,7 @@
#include "gfxhub_v12_0.h"
#include "mmhub_v4_1_0.h"
#include "athub_v4_1_0.h"
-
+#include "umc_v8_14.h"
static int gmc_v12_0_ecc_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
@@ -137,7 +137,10 @@ static int gmc_v12_0_process_interrupt(struct amdgpu_device *adev,
dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n",
addr, entry->client_id);
- if (!amdgpu_sriov_vf(adev))
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (status != 0)
hub->vmhub_funcs->print_l2_protection_fault_status(adev, status);
}
@@ -294,7 +297,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
return;
/* flush hdp cache */
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
/* This is necessary for SRIOV as well as for GFXOFF to function
* properly under bare metal
@@ -410,10 +413,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
struct amdgpu_device *adev = ring->adev;
uint32_t reg;
- /* MES fw manages IH_VMID_x_LUT updating */
- if (ring->is_mes_queue)
- return;
-
if (ring->vm_hub == AMDGPU_GFXHUB(0))
reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
else
@@ -498,9 +497,6 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev,
uint64_t *flags)
{
struct amdgpu_bo *bo = mapping->bo_va->base.bo;
- struct amdgpu_device *bo_adev;
- bool coherent, is_system;
-
*flags &= ~AMDGPU_PTE_EXECUTABLE;
*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
@@ -516,25 +512,11 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev,
*flags &= ~AMDGPU_PTE_VALID;
}
- if (!bo)
- return;
-
- if (bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
- AMDGPU_GEM_CREATE_UNCACHED))
- *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
-
- bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
- coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
- is_system = (bo->tbo.resource->mem_type == TTM_PL_TT) ||
- (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT);
-
if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
*flags |= AMDGPU_PTE_DCC;
- /* WA for HW bug */
- if (is_system || ((bo_adev != adev) && coherent))
- *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
-
+ if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED)
+ *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
}
static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
@@ -578,6 +560,18 @@ static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev)
static void gmc_v12_0_set_umc_funcs(struct amdgpu_device *adev)
{
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 14, 0):
+ adev->umc.channel_inst_num = UMC_V8_14_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V8_14_UMC_INSTANCE_NUM(adev);
+ adev->umc.node_inst_num = 0;
+ adev->umc.max_ras_err_cnt_per_query = UMC_V8_14_TOTAL_CHANNEL_NUM(adev);
+ adev->umc.channel_offs = UMC_V8_14_PER_CHANNEL_OFFSET;
+ adev->umc.ras = &umc_v8_14_ras;
+ break;
+ default:
+ break;
+ }
}
@@ -604,9 +598,9 @@ static void gmc_v12_0_set_gfxhub_funcs(struct amdgpu_device *adev)
}
}
-static int gmc_v12_0_early_init(void *handle)
+static int gmc_v12_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v12_0_set_gfxhub_funcs(adev);
gmc_v12_0_set_mmhub_funcs(adev);
@@ -624,9 +618,9 @@ static int gmc_v12_0_early_init(void *handle)
return 0;
}
-static int gmc_v12_0_late_init(void *handle)
+static int gmc_v12_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
@@ -731,10 +725,10 @@ static int gmc_v12_0_gart_init(struct amdgpu_device *adev)
return amdgpu_gart_table_vram_alloc(adev);
}
-static int gmc_v12_0_sw_init(void *handle)
+static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->init(adev);
@@ -822,10 +816,14 @@ static int gmc_v12_0_sw_init(void *handle)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
- adev->vm_manager.first_kfd_vmid = 8;
+ adev->vm_manager.first_kfd_vmid = adev->gfx.disable_kq ? 1 : 8;
amdgpu_vm_manager_init(adev);
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
return 0;
}
@@ -841,9 +839,9 @@ static void gmc_v12_0_gart_fini(struct amdgpu_device *adev)
amdgpu_gart_table_vram_free(adev);
}
-static int gmc_v12_0_sw_fini(void *handle)
+static int gmc_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_vm_manager_fini(adev);
gmc_v12_0_gart_fini(adev);
@@ -879,7 +877,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev)
return r;
/* Flush HDP after it is initialized */
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;
@@ -894,10 +892,10 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev)
return 0;
}
-static int gmc_v12_0_hw_init(void *handle)
+static int gmc_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* The sequence of these two function calls matters.*/
gmc_v12_0_init_golden_registers(adev);
@@ -924,9 +922,9 @@ static void gmc_v12_0_gart_disable(struct amdgpu_device *adev)
adev->mmhub.funcs->gart_disable(adev);
}
-static int gmc_v12_0_hw_fini(void *handle)
+static int gmc_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev)) {
/* full access mode, so don't touch any GMC register */
@@ -945,51 +943,43 @@ static int gmc_v12_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v12_0_suspend(void *handle)
+static int gmc_v12_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v12_0_hw_fini(adev);
+ gmc_v12_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v12_0_resume(void *handle)
+static int gmc_v12_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v12_0_hw_init(adev);
+ r = gmc_v12_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v12_0_is_idle(void *handle)
+static bool gmc_v12_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* MC is always ready in GMC v11.*/
return true;
}
-static int gmc_v12_0_wait_for_idle(void *handle)
+static int gmc_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* There is no need to wait for MC idle in GMC v11.*/
return 0;
}
-static int gmc_v12_0_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int gmc_v12_0_set_clockgating_state(void *handle,
+static int gmc_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = adev->mmhub.funcs->set_clockgating(adev, state);
if (r)
@@ -998,16 +988,16 @@ static int gmc_v12_0_set_clockgating_state(void *handle,
return athub_v4_1_0_set_clockgating(adev, state);
}
-static void gmc_v12_0_get_clockgating_state(void *handle, u64 *flags)
+static void gmc_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->get_clockgating(adev, flags);
athub_v4_1_0_get_clockgating(adev, flags);
}
-static int gmc_v12_0_set_powergating_state(void *handle,
+static int gmc_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1025,7 +1015,6 @@ const struct amd_ip_funcs gmc_v12_0_ip_funcs = {
.resume = gmc_v12_0_resume,
.is_idle = gmc_v12_0_is_idle,
.wait_for_idle = gmc_v12_0_wait_for_idle,
- .soft_reset = gmc_v12_0_soft_reset,
.set_clockgating_state = gmc_v12_0_set_clockgating_state,
.set_powergating_state = gmc_v12_0_set_powergating_state,
.get_clockgating_state = gmc_v12_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index d36725666b54..8030fcd64210 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -43,7 +43,7 @@
static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v6_0_wait_for_idle(void *handle);
+static int gmc_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/tahiti_mc.bin");
MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin");
@@ -64,8 +64,13 @@ MODULE_FIRMWARE("amdgpu/si58_mc.bin");
static void gmc_v6_0_mc_stop(struct amdgpu_device *adev)
{
u32 blackout;
+ struct amdgpu_ip_block *ip_block;
- gmc_v6_0_wait_for_idle((void *)adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ gmc_v6_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -126,7 +131,8 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
if (((RREG32(mmMC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
chip_name = "si58";
- err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
dev_err(adev->dev,
"si_mc: Failed to load firmware \"%s_mc.bin\"\n",
@@ -213,6 +219,8 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
{
int i, j;
+ struct amdgpu_ip_block *ip_block;
+
/* Initialize HDP */
for (i = 0, j = 0; i < 32; i++, j += 0x6) {
@@ -224,7 +232,11 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v6_0_wait_for_idle((void *)adev))
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ if (gmc_v6_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
if (adev->mode_info.num_crtc) {
@@ -237,7 +249,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
/* disable VGA render */
tmp = RREG32(mmVGA_RENDER_CONTROL);
- tmp &= ~VGA_VSTATUS_CNTL;
+ tmp &= VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK;
WREG32(mmVGA_RENDER_CONTROL, tmp);
}
/* Update configuration */
@@ -251,7 +263,7 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
- if (gmc_v6_0_wait_for_idle((void *)adev))
+ if (gmc_v6_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
}
@@ -615,17 +627,16 @@ static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
"write" : "read", block, mc_client, mc_id);
}
-/*
static const u32 mc_cg_registers[] = {
- MC_HUB_MISC_HUB_CG,
- MC_HUB_MISC_SIP_CG,
- MC_HUB_MISC_VM_CG,
- MC_XPB_CLK_GAT,
- ATC_MISC_CG,
- MC_CITF_MISC_WR_CG,
- MC_CITF_MISC_RD_CG,
- MC_CITF_MISC_VM_CG,
- VM_L2_CG,
+ mmMC_HUB_MISC_HUB_CG,
+ mmMC_HUB_MISC_SIP_CG,
+ mmMC_HUB_MISC_VM_CG,
+ mmMC_XPB_CLK_GAT,
+ mmATC_MISC_CG,
+ mmMC_CITF_MISC_WR_CG,
+ mmMC_CITF_MISC_RD_CG,
+ mmMC_CITF_MISC_VM_CG,
+ mmVM_L2_CG,
};
static const u32 mc_cg_ls_en[] = {
@@ -660,7 +671,7 @@ static void gmc_v6_0_enable_mc_ls(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
orig = data = RREG32(mc_cg_registers[i]);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
data |= mc_cg_ls_en[i];
else
data &= ~mc_cg_ls_en[i];
@@ -677,7 +688,7 @@ static void gmc_v6_0_enable_mc_mgcg(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
orig = data = RREG32(mc_cg_registers[i]);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
data |= mc_cg_en[i];
else
data &= ~mc_cg_en[i];
@@ -693,7 +704,7 @@ static void gmc_v6_0_enable_bif_mgls(struct amdgpu_device *adev,
orig = data = RREG32_PCIE(ixPCIE_CNTL2);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1);
data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1);
data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1);
@@ -716,7 +727,7 @@ static void gmc_v6_0_enable_hdp_mgcg(struct amdgpu_device *adev,
orig = data = RREG32(mmHDP_HOST_PATH_CNTL);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
else
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);
@@ -732,7 +743,7 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
orig = data = RREG32(mmHDP_MEM_POWER_LS);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
else
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);
@@ -740,7 +751,6 @@ static void gmc_v6_0_enable_hdp_ls(struct amdgpu_device *adev,
if (orig != data)
WREG32(mmHDP_MEM_POWER_LS, data);
}
-*/
static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type)
{
@@ -762,9 +772,9 @@ static int gmc_v6_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v6_0_early_init(void *handle)
+static int gmc_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v6_0_set_gmc_funcs(adev);
gmc_v6_0_set_irq_funcs(adev);
@@ -772,9 +782,9 @@ static int gmc_v6_0_early_init(void *handle)
return 0;
}
-static int gmc_v6_0_late_init(void *handle)
+static int gmc_v6_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -799,10 +809,10 @@ static unsigned int gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
-static int gmc_v6_0_sw_init(void *handle)
+static int gmc_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
@@ -876,9 +886,9 @@ static int gmc_v6_0_sw_init(void *handle)
return 0;
}
-static int gmc_v6_0_sw_fini(void *handle)
+static int gmc_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
@@ -889,10 +899,10 @@ static int gmc_v6_0_sw_fini(void *handle)
return 0;
}
-static int gmc_v6_0_hw_init(void *handle)
+static int gmc_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v6_0_mc_program(adev);
@@ -914,9 +924,9 @@ static int gmc_v6_0_hw_init(void *handle)
return 0;
}
-static int gmc_v6_0_hw_fini(void *handle)
+static int gmc_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v6_0_gart_disable(adev);
@@ -924,21 +934,19 @@ static int gmc_v6_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v6_0_suspend(void *handle)
+static int gmc_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v6_0_hw_fini(adev);
+ gmc_v6_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v6_0_resume(void *handle)
+static int gmc_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = gmc_v6_0_hw_init(adev);
+ r = gmc_v6_0_hw_init(ip_block);
if (r)
return r;
@@ -947,9 +955,10 @@ static int gmc_v6_0_resume(void *handle)
return 0;
}
-static bool gmc_v6_0_is_idle(void *handle)
+static bool gmc_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -959,13 +968,13 @@ static bool gmc_v6_0_is_idle(void *handle)
return true;
}
-static int gmc_v6_0_wait_for_idle(void *handle)
+static int gmc_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (gmc_v6_0_is_idle(handle))
+ if (gmc_v6_0_is_idle(ip_block))
return 0;
udelay(1);
}
@@ -973,9 +982,10 @@ static int gmc_v6_0_wait_for_idle(void *handle)
}
-static int gmc_v6_0_soft_reset(void *handle)
+static int gmc_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -992,7 +1002,8 @@ static int gmc_v6_0_soft_reset(void *handle)
if (srbm_soft_reset) {
gmc_v6_0_mc_stop(adev);
- if (gmc_v6_0_wait_for_idle(adev))
+
+ if (gmc_v6_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
tmp = RREG32(mmSRBM_SOFT_RESET);
@@ -1082,13 +1093,27 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int gmc_v6_0_set_clockgating_state(void *handle,
+static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool gate = false;
+
+ if (state == AMD_CG_STATE_GATE)
+ gate = true;
+
+ if (!(adev->flags & AMD_IS_APU)) {
+ gmc_v6_0_enable_mc_mgcg(adev, gate);
+ gmc_v6_0_enable_mc_ls(adev, gate);
+ }
+ gmc_v6_0_enable_bif_mgls(adev, gate);
+ gmc_v6_0_enable_hdp_mgcg(adev, gate);
+ gmc_v6_0_enable_hdp_ls(adev, gate);
+
return 0;
}
-static int gmc_v6_0_set_powergating_state(void *handle,
+static int gmc_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1109,8 +1134,6 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = {
.soft_reset = gmc_v6_0_soft_reset,
.set_clockgating_state = gmc_v6_0_set_clockgating_state,
.set_powergating_state = gmc_v6_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 994432fb57ea..a8d5795084fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -52,7 +52,7 @@
static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v7_0_wait_for_idle(void *handle);
+static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/bonaire_mc.bin");
MODULE_FIRMWARE("amdgpu/hawaii_mc.bin");
@@ -87,9 +87,14 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
static void gmc_v7_0_mc_stop(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 blackout;
- gmc_v7_0_wait_for_idle((void *)adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ gmc_v7_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -152,7 +157,8 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
return -EINVAL;
}
- err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
pr_err("cik_mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name);
amdgpu_ucode_release(&adev->gmc.fw);
@@ -251,9 +257,14 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 tmp;
int i, j;
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
/* Initialize HDP */
for (i = 0, j = 0; i < 32; i++, j += 0x6) {
WREG32((0xb05 + j), 0x00000000);
@@ -264,7 +275,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v7_0_wait_for_idle((void *)adev))
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
if (adev->mode_info.num_crtc) {
@@ -288,7 +299,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_AGP_BASE, 0);
WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
- if (gmc_v7_0_wait_for_idle((void *)adev))
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
@@ -921,9 +932,9 @@ static int gmc_v7_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v7_0_early_init(void *handle)
+static int gmc_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v7_0_set_gmc_funcs(adev);
gmc_v7_0_set_irq_funcs(adev);
@@ -940,9 +951,9 @@ static int gmc_v7_0_early_init(void *handle)
return 0;
}
-static int gmc_v7_0_late_init(void *handle)
+static int gmc_v7_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -968,10 +979,10 @@ static unsigned int gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
return size;
}
-static int gmc_v7_0_sw_init(void *handle)
+static int gmc_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
@@ -1060,9 +1071,9 @@ static int gmc_v7_0_sw_init(void *handle)
return 0;
}
-static int gmc_v7_0_sw_fini(void *handle)
+static int gmc_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
@@ -1074,10 +1085,10 @@ static int gmc_v7_0_sw_fini(void *handle)
return 0;
}
-static int gmc_v7_0_hw_init(void *handle)
+static int gmc_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v7_0_init_golden_registers(adev);
@@ -1101,9 +1112,9 @@ static int gmc_v7_0_hw_init(void *handle)
return 0;
}
-static int gmc_v7_0_hw_fini(void *handle)
+static int gmc_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v7_0_gart_disable(adev);
@@ -1111,32 +1122,29 @@ static int gmc_v7_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v7_0_suspend(void *handle)
+static int gmc_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v7_0_hw_fini(adev);
+ gmc_v7_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v7_0_resume(void *handle)
+static int gmc_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v7_0_hw_init(adev);
+ r = gmc_v7_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v7_0_is_idle(void *handle)
+static bool gmc_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -1146,20 +1154,13 @@ static bool gmc_v7_0_is_idle(void *handle)
return true;
}
-static int gmc_v7_0_wait_for_idle(void *handle)
+static int gmc_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
- u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- /* read MC_STATUS */
- tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK |
- SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
- SRBM_STATUS__MCC_BUSY_MASK |
- SRBM_STATUS__MCD_BUSY_MASK |
- SRBM_STATUS__VMC_BUSY_MASK);
- if (!tmp)
+ if (gmc_v7_0_is_idle(ip_block))
return 0;
udelay(1);
}
@@ -1167,9 +1168,9 @@ static int gmc_v7_0_wait_for_idle(void *handle)
}
-static int gmc_v7_0_soft_reset(void *handle)
+static int gmc_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -1186,7 +1187,7 @@ static int gmc_v7_0_soft_reset(void *handle)
if (srbm_soft_reset) {
gmc_v7_0_mc_stop(adev);
- if (gmc_v7_0_wait_for_idle((void *)adev))
+ if (gmc_v7_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
tmp = RREG32(mmSRBM_SOFT_RESET);
@@ -1310,11 +1311,11 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int gmc_v7_0_set_clockgating_state(void *handle,
+static int gmc_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -1330,7 +1331,7 @@ static int gmc_v7_0_set_clockgating_state(void *handle,
return 0;
}
-static int gmc_v7_0_set_powergating_state(void *handle,
+static int gmc_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1351,8 +1352,6 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
.soft_reset = gmc_v7_0_soft_reset,
.set_clockgating_state = gmc_v7_0_set_clockgating_state,
.set_powergating_state = gmc_v7_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 86488c052f82..99ca08e9bdb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -53,7 +53,7 @@
static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
-static int gmc_v8_0_wait_for_idle(void *handle);
+static int gmc_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
@@ -170,8 +170,13 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
static void gmc_v8_0_mc_stop(struct amdgpu_device *adev)
{
u32 blackout;
+ struct amdgpu_ip_block *ip_block;
- gmc_v8_0_wait_for_idle(adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ gmc_v8_0_wait_for_idle(ip_block);
blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -254,7 +259,8 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
return -EINVAL;
}
- err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name);
+ err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_mc.bin", chip_name);
if (err) {
pr_err("mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name);
amdgpu_ucode_release(&adev->gmc.fw);
@@ -426,6 +432,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
*/
static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
u32 tmp;
int i, j;
@@ -439,7 +446,11 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
}
WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
- if (gmc_v8_0_wait_for_idle((void *)adev))
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GMC);
+ if (!ip_block)
+ return;
+
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
if (adev->mode_info.num_crtc) {
@@ -474,7 +485,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
WREG32(mmMC_VM_AGP_BASE, 0);
WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22);
WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22);
- if (gmc_v8_0_wait_for_idle((void *)adev))
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for MC idle timedout !\n");
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
@@ -1027,9 +1038,9 @@ static int gmc_v8_0_convert_vram_type(int mc_seq_vram_type)
}
}
-static int gmc_v8_0_early_init(void *handle)
+static int gmc_v8_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v8_0_set_gmc_funcs(adev);
gmc_v8_0_set_irq_funcs(adev);
@@ -1046,9 +1057,9 @@ static int gmc_v8_0_early_init(void *handle)
return 0;
}
-static int gmc_v8_0_late_init(void *handle)
+static int gmc_v8_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
@@ -1076,10 +1087,10 @@ static unsigned int gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
#define mmMC_SEQ_MISC0_FIJI 0xA71
-static int gmc_v8_0_sw_init(void *handle)
+static int gmc_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask);
@@ -1173,9 +1184,9 @@ static int gmc_v8_0_sw_init(void *handle)
return 0;
}
-static int gmc_v8_0_sw_fini(void *handle)
+static int gmc_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
@@ -1187,10 +1198,10 @@ static int gmc_v8_0_sw_fini(void *handle)
return 0;
}
-static int gmc_v8_0_hw_init(void *handle)
+static int gmc_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v8_0_init_golden_registers(adev);
@@ -1222,9 +1233,9 @@ static int gmc_v8_0_hw_init(void *handle)
return 0;
}
-static int gmc_v8_0_hw_fini(void *handle)
+static int gmc_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v8_0_gart_disable(adev);
@@ -1232,32 +1243,29 @@ static int gmc_v8_0_hw_fini(void *handle)
return 0;
}
-static int gmc_v8_0_suspend(void *handle)
+static int gmc_v8_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- gmc_v8_0_hw_fini(adev);
+ gmc_v8_0_hw_fini(ip_block);
return 0;
}
-static int gmc_v8_0_resume(void *handle)
+static int gmc_v8_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v8_0_hw_init(adev);
+ r = gmc_v8_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v8_0_is_idle(void *handle)
+static bool gmc_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
@@ -1267,11 +1275,11 @@ static bool gmc_v8_0_is_idle(void *handle)
return true;
}
-static int gmc_v8_0_wait_for_idle(void *handle)
+static int gmc_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned int i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -1289,10 +1297,10 @@ static int gmc_v8_0_wait_for_idle(void *handle)
}
-static bool gmc_v8_0_check_soft_reset(void *handle)
+static bool gmc_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
@@ -1316,23 +1324,23 @@ static bool gmc_v8_0_check_soft_reset(void *handle)
return false;
}
-static int gmc_v8_0_pre_soft_reset(void *handle)
+static int gmc_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->gmc.srbm_soft_reset)
return 0;
gmc_v8_0_mc_stop(adev);
- if (gmc_v8_0_wait_for_idle(adev))
+ if (gmc_v8_0_wait_for_idle(ip_block))
dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
return 0;
}
-static int gmc_v8_0_soft_reset(void *handle)
+static int gmc_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->gmc.srbm_soft_reset)
@@ -1361,9 +1369,9 @@ static int gmc_v8_0_soft_reset(void *handle)
return 0;
}
-static int gmc_v8_0_post_soft_reset(void *handle)
+static int gmc_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->gmc.srbm_soft_reset)
return 0;
@@ -1651,10 +1659,10 @@ static void fiji_update_mc_light_sleep(struct amdgpu_device *adev,
}
}
-static int gmc_v8_0_set_clockgating_state(void *handle,
+static int gmc_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1672,15 +1680,15 @@ static int gmc_v8_0_set_clockgating_state(void *handle,
return 0;
}
-static int gmc_v8_0_set_powergating_state(void *handle,
+static int gmc_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void gmc_v8_0_get_clockgating_state(void *handle, u64 *flags)
+static void gmc_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1715,8 +1723,6 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
.set_clockgating_state = gmc_v8_0_set_clockgating_state,
.set_powergating_state = gmc_v8_0_set_powergating_state,
.get_clockgating_state = gmc_v8_0_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index c76ac0dfe572..282197f4ffb1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -411,6 +411,11 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
(0x001d43e0 + 0x00001800),
};
+static inline bool gmc_v9_0_is_multi_chiplet(struct amdgpu_device *adev)
+{
+ return !!adev->aid_mask;
+}
+
static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned int type,
@@ -623,6 +628,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
}
}
+ if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault))
+ return 1;
+
if (!printk_ratelimit())
return 0;
@@ -644,8 +652,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
addr, entry->client_id,
soc15_ih_clientid_name[entry->client_id]);
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ if (gmc_v9_0_is_multi_chiplet(adev))
dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n",
node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4,
node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : "");
@@ -672,6 +679,12 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
(amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2)))
return 0;
+ /* Only print L2 fault status if the status register could be read and
+ * contains useful information
+ */
+ if (!status)
+ return 0;
+
if (!amdgpu_sriov_vf(adev))
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
@@ -788,8 +801,7 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
uint32_t vmhub)
{
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ gmc_v9_0_is_multi_chiplet(adev))
return false;
return ((vmhub == AMDGPU_MMHUB0(0) ||
@@ -1124,16 +1136,21 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
uint64_t *flags)
{
struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
- bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
- bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | AMDGPU_GEM_CREATE_EXT_COHERENT);
+ bool is_vram = bo->tbo.resource &&
+ bo->tbo.resource->mem_type == TTM_PL_VRAM;
+ bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+ AMDGPU_GEM_CREATE_EXT_COHERENT);
bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT;
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
struct amdgpu_vm *vm = mapping->bo_va->base.vm;
unsigned int mtype_local, mtype;
+ uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0);
bool snoop = false;
bool is_local;
- switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ dma_resv_assert_held(bo->tbo.base.resv);
+
+ switch (gc_ip_version) {
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
if (is_vram) {
@@ -1147,10 +1164,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
/* FIXME: is this still needed? Or does
* amdgpu_ttm_tt_pde_flags already handle this?
*/
- if ((amdgpu_ip_version(adev, GC_HWIP, 0) ==
- IP_VERSION(9, 4, 2) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) ==
- IP_VERSION(9, 4, 3)) &&
+ if (gc_ip_version == IP_VERSION(9, 4, 2) &&
adev->gmc.xgmi.connected_to_cpu)
snoop = true;
} else {
@@ -1174,6 +1188,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
/* Only local VRAM BOs or system memory on non-NUMA APUs
* can be assumed to be local in their entirety. Choose
* MTYPE_NC as safe fallback for all system memory BOs on
@@ -1198,20 +1213,17 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
if (uncached) {
mtype = MTYPE_UC;
} else if (ext_coherent) {
- if (adev->rev_id)
- mtype = is_local ? MTYPE_CC : MTYPE_UC;
- else
- mtype = MTYPE_UC;
+ mtype = is_local ? MTYPE_CC : MTYPE_UC;
} else if (adev->flags & AMD_IS_APU) {
mtype = is_local ? mtype_local : MTYPE_NC;
} else {
/* dGPU */
if (is_local)
mtype = mtype_local;
- else if (is_vram)
- mtype = MTYPE_NC;
- else
+ else if (gc_ip_version < IP_VERSION(9, 5, 0) && !is_vram)
mtype = MTYPE_UC;
+ else
+ mtype = MTYPE_NC;
}
break;
@@ -1251,9 +1263,8 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
*flags &= ~AMDGPU_PTE_VALID;
}
- if (bo && bo->tbo.resource)
- gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.bo,
- mapping, flags);
+ if ((*flags & AMDGPU_PTE_VALID) && bo)
+ gmc_v9_0_get_coherence_flags(adev, bo, mapping, flags);
}
static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
@@ -1265,8 +1276,8 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
/* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system
* memory can use more efficient MTYPEs.
*/
- if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) &&
- amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4))
+ if (!(adev->flags & AMD_IS_APU) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3))
return;
/* Only direct-mapped memory allows us to determine the NUMA node from
@@ -1322,7 +1333,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
mtype_local = MTYPE_CC;
*flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local);
- } else if (adev->rev_id) {
+ } else {
/* MTYPE_UC case */
*flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
}
@@ -1387,14 +1398,44 @@ gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes)
}
static enum amdgpu_memory_partition
+gmc_v9_0_query_vf_memory_partition(struct amdgpu_device *adev)
+{
+ switch (adev->gmc.num_mem_partitions) {
+ case 0:
+ return UNKNOWN_MEMORY_PARTITION_MODE;
+ case 1:
+ return AMDGPU_NPS1_PARTITION_MODE;
+ case 2:
+ return AMDGPU_NPS2_PARTITION_MODE;
+ case 4:
+ return AMDGPU_NPS4_PARTITION_MODE;
+ default:
+ return AMDGPU_NPS1_PARTITION_MODE;
+ }
+
+ return AMDGPU_NPS1_PARTITION_MODE;
+}
+
+static enum amdgpu_memory_partition
gmc_v9_0_query_memory_partition(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev))
- return AMDGPU_NPS1_PARTITION_MODE;
+ return gmc_v9_0_query_vf_memory_partition(adev);
return gmc_v9_0_get_memory_partition(adev, NULL);
}
+static bool gmc_v9_0_need_reset_on_init(struct amdgpu_device *adev)
+{
+ if (adev->nbio.funcs && adev->nbio.funcs->is_nps_switch_requested &&
+ adev->nbio.funcs->is_nps_switch_requested(adev)) {
+ adev->gmc.reset_flags |= AMDGPU_GMC_INIT_RESET_NPS;
+ return true;
+ }
+
+ return false;
+}
+
static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
.flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
.flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
@@ -1406,6 +1447,8 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
.override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
.get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
.query_mem_partition_mode = &gmc_v9_0_query_memory_partition,
+ .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition,
+ .need_reset_on_init = &gmc_v9_0_need_reset_on_init,
};
static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
@@ -1452,14 +1495,13 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0];
break;
case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 5, 0):
adev->umc.max_ras_err_cnt_per_query =
UMC_V12_0_TOTAL_CHANNEL_NUM(adev) * UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL;
adev->umc.channel_inst_num = UMC_V12_0_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM;
adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM;
adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET;
- adev->umc.active_mask = adev->aid_mask;
- adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL;
if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
adev->umc.ras = &umc_v12_0_ras;
break;
@@ -1478,6 +1520,7 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
adev->mmhub.funcs = &mmhub_v1_7_funcs;
break;
case IP_VERSION(1, 8, 0):
+ case IP_VERSION(1, 8, 1):
adev->mmhub.funcs = &mmhub_v1_8_funcs;
break;
default:
@@ -1499,6 +1542,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
adev->mmhub.ras = &mmhub_v1_7_ras;
break;
case IP_VERSION(1, 8, 0):
+ case IP_VERSION(1, 8, 1):
adev->mmhub.ras = &mmhub_v1_8_ras;
break;
default:
@@ -1509,8 +1553,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ if (gmc_v9_0_is_multi_chiplet(adev))
adev->gfxhub.funcs = &gfxhub_v1_2_funcs;
else
adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
@@ -1545,9 +1588,46 @@ static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev)
adev->gmc.xgmi.ras = &xgmi_ras;
}
-static int gmc_v9_0_early_init(void *handle)
+static void gmc_v9_0_init_nps_details(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ enum amdgpu_memory_partition mode;
+ uint32_t supp_modes;
+ int i;
+
+ adev->gmc.supported_nps_modes = 0;
+
+ if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+ return;
+
+ mode = gmc_v9_0_get_memory_partition(adev, &supp_modes);
+
+ /* Mode detected by hardware and supported modes available */
+ if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && supp_modes) {
+ while ((i = ffs(supp_modes))) {
+ if (AMDGPU_ALL_NPS_MASK & BIT(i))
+ adev->gmc.supported_nps_modes |= BIT(i);
+ supp_modes &= supp_modes - 1;
+ }
+ } else {
+ /*TODO: Check PSP version also which supports NPS switch. Otherwise keep
+ * supported modes as 0.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ adev->gmc.supported_nps_modes =
+ BIT(AMDGPU_NPS1_PARTITION_MODE) |
+ BIT(AMDGPU_NPS4_PARTITION_MODE);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
/*
* 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined
@@ -1555,8 +1635,7 @@ static int gmc_v9_0_early_init(void *handle)
*/
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ gmc_v9_0_is_multi_chiplet(adev))
adev->gmc.xgmi.supported = true;
if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) {
@@ -1565,8 +1644,7 @@ static int gmc_v9_0_early_init(void *handle)
adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
}
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
enum amdgpu_pkg_type pkg_type =
adev->smuio.funcs->get_pkg_type(adev);
/* On GFXIP 9.4.3. APU, there is no physical VRAM domain present
@@ -1601,9 +1679,9 @@ static int gmc_v9_0_early_init(void *handle)
return 0;
}
-static int gmc_v9_0_late_init(void *handle)
+static int gmc_v9_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_gmc_allocate_vm_inv_eng(adev);
@@ -1729,6 +1807,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@@ -1900,6 +1979,8 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
switch (mode) {
case UNKNOWN_MEMORY_PARTITION_MODE:
+ adev->gmc.num_mem_partitions = 0;
+ break;
case AMDGPU_NPS1_PARTITION_MODE:
adev->gmc.num_mem_partitions = 1;
break;
@@ -1919,7 +2000,7 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
/* Use NPS range info, if populated */
r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges,
- adev->gmc.num_mem_partitions);
+ &adev->gmc.num_mem_partitions);
if (!r) {
l = 0;
for (i = 1; i < adev->gmc.num_mem_partitions; ++i) {
@@ -1929,6 +2010,11 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
}
} else {
+ if (!adev->gmc.num_mem_partitions) {
+ dev_err(adev->dev,
+ "Not able to detect NPS mode, fall back to NPS1");
+ adev->gmc.num_mem_partitions = 1;
+ }
/* Fallback to sw based calculation */
size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT;
size /= adev->gmc.num_mem_partitions;
@@ -1985,12 +2071,15 @@ static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev)
{
adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM;
adev->gmc.vram_width = 128 * 64;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM3E;
}
-static int gmc_v9_0_sw_init(void *handle)
+static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
unsigned long inst_mask = adev->aid_mask;
adev->gfxhub.funcs->init(adev);
@@ -1999,8 +2088,7 @@ static int gmc_v9_0_sw_init(void *handle)
spin_lock_init(&adev->gmc.invalidate_lock);
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
+ if (gmc_v9_0_is_multi_chiplet(adev)) {
gmc_v9_4_3_init_vram_info(adev);
} else if (!adev->bios) {
if (adev->flags & AMD_IS_APU) {
@@ -2084,6 +2172,7 @@ static int gmc_v9_0_sw_init(void *handle)
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0),
NUM_XCC(adev->gfx.xcc_mask));
@@ -2149,8 +2238,7 @@ static int gmc_v9_0_sw_init(void *handle)
amdgpu_gmc_get_vbios_allocations(adev);
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
+ if (gmc_v9_0_is_multi_chiplet(adev)) {
r = gmc_v9_0_init_mem_ranges(adev);
if (r)
return r;
@@ -2165,6 +2253,7 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
+ gmc_v9_0_init_nps_details(adev);
/*
* number of VMs
* VMID 0 is reserved for System
@@ -2178,8 +2267,7 @@ static int gmc_v9_0_sw_init(void *handle)
adev->vm_manager.first_kfd_vmid =
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) ?
+ gmc_v9_0_is_multi_chiplet(adev)) ?
3 :
8;
@@ -2191,19 +2279,17 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ if (gmc_v9_0_is_multi_chiplet(adev))
amdgpu_gmc_sysfs_init(adev);
return 0;
}
-static int gmc_v9_0_sw_fini(void *handle)
+static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
+ if (gmc_v9_0_is_multi_chiplet(adev))
amdgpu_gmc_sysfs_fini(adev);
amdgpu_gmc_ras_fini(adev);
@@ -2308,9 +2394,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
return 0;
}
-static int gmc_v9_0_hw_init(void *handle)
+static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool value;
int i, r;
@@ -2324,13 +2410,6 @@ static int gmc_v9_0_hw_init(void *handle)
adev->gmc.flush_tlb_needs_extra_type_2 =
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) &&
adev->gmc.xgmi.num_physical_nodes;
- /*
- * TODO: This workaround is badly documented and had a buggy
- * implementation. We should probably verify what we do here.
- */
- adev->gmc.flush_tlb_needs_extra_type_0 =
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) &&
- adev->rev_id == 0;
/* The sequence of these two function calls matters.*/
gmc_v9_0_init_golden_registers(adev);
@@ -2348,7 +2427,7 @@ static int gmc_v9_0_hw_init(void *handle)
adev->hdp.funcs->init_registers(adev);
/* After HDP is initialized, flush HDP.*/
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
value = false;
@@ -2393,9 +2472,9 @@ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
adev->mmhub.funcs->gart_disable(adev);
}
-static int gmc_v9_0_hw_fini(void *handle)
+static int gmc_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
gmc_v9_0_gart_disable(adev);
@@ -2413,58 +2492,70 @@ static int gmc_v9_0_hw_fini(void *handle)
if (adev->mmhub.funcs->update_power_gating)
adev->mmhub.funcs->update_power_gating(adev, false);
- amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ /*
+ * For minimal init, late_init is not called, hence VM fault/RAS irqs
+ * are not enabled.
+ */
+ if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
- if (adev->gmc.ecc_irq.funcs &&
- amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
- amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+ if (adev->gmc.ecc_irq.funcs &&
+ amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC))
+ amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
+ }
return 0;
}
-static int gmc_v9_0_suspend(void *handle)
+static int gmc_v9_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return gmc_v9_0_hw_fini(adev);
+ return gmc_v9_0_hw_fini(ip_block);
}
-static int gmc_v9_0_resume(void *handle)
+static int gmc_v9_0_resume(struct amdgpu_ip_block *ip_block)
{
+ struct amdgpu_device *adev = ip_block->adev;
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = gmc_v9_0_hw_init(adev);
+ /* If a reset is done for NPS mode switch, read the memory range
+ * information again.
+ */
+ if (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS) {
+ gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions);
+ adev->gmc.reset_flags &= ~AMDGPU_GMC_INIT_RESET_NPS;
+ }
+
+ r = gmc_v9_0_hw_init(ip_block);
if (r)
return r;
- amdgpu_vmid_reset_all(adev);
+ amdgpu_vmid_reset_all(ip_block->adev);
return 0;
}
-static bool gmc_v9_0_is_idle(void *handle)
+static bool gmc_v9_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* MC is always ready in GMC v9.*/
return true;
}
-static int gmc_v9_0_wait_for_idle(void *handle)
+static int gmc_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* There is no need to wait for MC idle in GMC v9.*/
return 0;
}
-static int gmc_v9_0_soft_reset(void *handle)
+static int gmc_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* XXX for emulation.*/
return 0;
}
-static int gmc_v9_0_set_clockgating_state(void *handle,
+static int gmc_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->set_clockgating(adev, state);
@@ -2473,16 +2564,16 @@ static int gmc_v9_0_set_clockgating_state(void *handle,
return 0;
}
-static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags)
+static void gmc_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->mmhub.funcs->get_clockgating(adev, flags);
athub_v1_0_get_clockgating(adev, flags);
}
-static int gmc_v9_0_set_powergating_state(void *handle,
+static int gmc_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index e019249883fb..e6c0d86d3486 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v4_0.h"
#include "amdgpu_ras.h"
@@ -37,15 +36,6 @@
#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L
#define mmHDP_MEM_POWER_CTRL_BASE_IDX 0
-static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring || !ring->funcs->emit_wreg)
- WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
- amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-}
-
static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
@@ -54,11 +44,13 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5))
return;
- if (!ring || !ring->funcs->emit_wreg)
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
- else
+ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
+ } else {
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
+ }
}
static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
@@ -177,7 +169,7 @@ struct amdgpu_hdp_ras hdp_v4_0_ras = {
};
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
- .flush_hdp = hdp_v4_0_flush_hdp,
+ .flush_hdp = amdgpu_hdp_generic_flush,
.invalidate_hdp = hdp_v4_0_invalidate_hdp,
.update_clock_gating = hdp_v4_0_update_clock_gating,
.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
index ed7facacf2fe..8bc001dc9f63 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
@@ -21,27 +21,18 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v5_0.h"
#include "hdp/hdp_5_0_0_offset.h"
#include "hdp/hdp_5_0_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
-static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring || !ring->funcs->emit_wreg)
- WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
- amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-}
-
static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg) {
WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
} else {
amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
@@ -215,7 +206,7 @@ static void hdp_v5_0_init_registers(struct amdgpu_device *adev)
}
const struct amdgpu_hdp_funcs hdp_v5_0_funcs = {
- .flush_hdp = hdp_v5_0_flush_hdp,
+ .flush_hdp = amdgpu_hdp_generic_flush,
.invalidate_hdp = hdp_v5_0_invalidate_hdp,
.update_clock_gating = hdp_v5_0_update_clock_gating,
.get_clock_gating_state = hdp_v5_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
index 29c3484ae1f1..40940b4ab400 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v5_2.h"
#include "hdp/hdp_5_2_1_offset.h"
@@ -31,13 +30,25 @@
static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
- if (!ring || !ring->funcs->emit_wreg)
+ if (!ring || !ring->funcs->emit_wreg) {
WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
0);
- else
+ if (amdgpu_sriov_vf(adev)) {
+ /* this is fine because SR_IOV doesn't remap the register */
+ RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
+ } else {
+ /* We just need to read back a register to post the write.
+ * Reading back the remapped register causes problems on
+ * some platforms so just read back the memory size register.
+ */
+ if (adev->nbio.funcs->get_memsize)
+ adev->nbio.funcs->get_memsize(adev);
+ }
+ } else {
amdgpu_ring_emit_wreg(ring,
(adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
0);
+ }
}
static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
index 33736d361dd0..ec20daf4272c 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v6_0.h"
#include "hdp/hdp_6_0_0_offset.h"
@@ -31,15 +30,6 @@
#define regHDP_CLK_CNTL_V6_1 0xd5
#define regHDP_CLK_CNTL_V6_1_BASE_IDX 0
-static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring || !ring->funcs->emit_wreg)
- WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
- amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-}
-
static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
bool enable)
{
@@ -148,7 +138,7 @@ static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev,
}
const struct amdgpu_hdp_funcs hdp_v6_0_funcs = {
- .flush_hdp = hdp_v6_0_flush_hdp,
+ .flush_hdp = amdgpu_hdp_generic_flush,
.update_clock_gating = hdp_v6_0_update_clock_gating,
.get_clock_gating_state = hdp_v6_0_get_clockgating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
index 1c99bb09e2a1..ed1debc03507 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
@@ -21,22 +21,12 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "hdp_v7_0.h"
#include "hdp/hdp_7_0_0_offset.h"
#include "hdp/hdp_7_0_0_sh_mask.h"
#include <uapi/linux/kfd_ioctl.h>
-static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- if (!ring || !ring->funcs->emit_wreg)
- WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
- else
- amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
-}
-
static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev,
bool enable)
{
@@ -136,7 +126,7 @@ static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev,
}
const struct amdgpu_hdp_funcs hdp_v7_0_funcs = {
- .flush_hdp = hdp_v7_0_flush_hdp,
+ .flush_hdp = amdgpu_hdp_generic_flush,
.update_clock_gating = hdp_v7_0_update_clock_gating,
.get_clock_gating_state = hdp_v7_0_get_clockgating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index 07984f7c3ae7..1317ede131b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -273,9 +273,9 @@ static void iceland_ih_set_rptr(struct amdgpu_device *adev,
WREG32(mmIH_RB_RPTR, ih->rptr);
}
-static int iceland_ih_early_init(void *handle)
+static int iceland_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -287,10 +287,10 @@ static int iceland_ih_early_init(void *handle)
return 0;
}
-static int iceland_ih_sw_init(void *handle)
+static int iceland_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
@@ -301,9 +301,9 @@ static int iceland_ih_sw_init(void *handle)
return r;
}
-static int iceland_ih_sw_fini(void *handle)
+static int iceland_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -311,39 +311,33 @@ static int iceland_ih_sw_fini(void *handle)
return 0;
}
-static int iceland_ih_hw_init(void *handle)
+static int iceland_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return iceland_ih_irq_init(adev);
}
-static int iceland_ih_hw_fini(void *handle)
+static int iceland_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- iceland_ih_irq_disable(adev);
+ iceland_ih_irq_disable(ip_block->adev);
return 0;
}
-static int iceland_ih_suspend(void *handle)
+static int iceland_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return iceland_ih_hw_fini(adev);
+ return iceland_ih_hw_fini(ip_block);
}
-static int iceland_ih_resume(void *handle)
+static int iceland_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return iceland_ih_hw_init(adev);
+ return iceland_ih_hw_init(ip_block);
}
-static bool iceland_ih_is_idle(void *handle)
+static bool iceland_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -352,11 +346,11 @@ static bool iceland_ih_is_idle(void *handle)
return true;
}
-static int iceland_ih_wait_for_idle(void *handle)
+static int iceland_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -368,10 +362,10 @@ static int iceland_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int iceland_ih_soft_reset(void *handle)
+static int iceland_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
@@ -398,13 +392,13 @@ static int iceland_ih_soft_reset(void *handle)
return 0;
}
-static int iceland_ih_set_clockgating_state(void *handle,
+static int iceland_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int iceland_ih_set_powergating_state(void *handle,
+static int iceland_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -413,7 +407,6 @@ static int iceland_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs iceland_ih_ip_funcs = {
.name = "iceland_ih",
.early_init = iceland_ih_early_init,
- .late_init = NULL,
.sw_init = iceland_ih_sw_init,
.sw_fini = iceland_ih_sw_fini,
.hw_init = iceland_ih_hw_init,
@@ -425,8 +418,6 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = {
.soft_reset = iceland_ih_soft_reset,
.set_clockgating_state = iceland_ih_set_clockgating_state,
.set_powergating_state = iceland_ih_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs iceland_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index 18a761d6ef33..5900b560b7de 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -349,6 +349,7 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev)
if (ret)
return ret;
}
+ ih[i]->overflow = false;
}
/* update doorbell range for ih ring 0 */
@@ -446,7 +447,10 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev,
wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
- wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ if (!amdgpu_sriov_vf(adev))
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ else
+ ih->overflow = true;
/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 32). Hopefully
@@ -559,19 +563,19 @@ static void ih_v6_0_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &ih_v6_0_self_irq_funcs;
}
-static int ih_v6_0_early_init(void *handle)
+static int ih_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
ih_v6_0_set_interrupt_funcs(adev);
ih_v6_0_set_self_irq_funcs(adev);
return 0;
}
-static int ih_v6_0_sw_init(void *handle)
+static int ih_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr;
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
@@ -614,19 +618,19 @@ static int ih_v6_0_sw_init(void *handle)
return r;
}
-static int ih_v6_0_sw_fini(void *handle)
+static int ih_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int ih_v6_0_hw_init(void *handle)
+static int ih_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = ih_v6_0_irq_init(adev);
if (r)
@@ -635,42 +639,36 @@ static int ih_v6_0_hw_init(void *handle)
return 0;
}
-static int ih_v6_0_hw_fini(void *handle)
+static int ih_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- ih_v6_0_irq_disable(adev);
+ ih_v6_0_irq_disable(ip_block->adev);
return 0;
}
-static int ih_v6_0_suspend(void *handle)
+static int ih_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return ih_v6_0_hw_fini(adev);
+ return ih_v6_0_hw_fini(ip_block);
}
-static int ih_v6_0_resume(void *handle)
+static int ih_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return ih_v6_0_hw_init(adev);
+ return ih_v6_0_hw_init(ip_block);
}
-static bool ih_v6_0_is_idle(void *handle)
+static bool ih_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int ih_v6_0_wait_for_idle(void *handle)
+static int ih_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int ih_v6_0_soft_reset(void *handle)
+static int ih_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
@@ -699,10 +697,10 @@ static void ih_v6_0_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int ih_v6_0_set_clockgating_state(void *handle,
+static int ih_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
ih_v6_0_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -762,10 +760,10 @@ static void ih_v6_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
}
-static int ih_v6_0_set_powergating_state(void *handle,
+static int ih_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
@@ -774,9 +772,9 @@ static int ih_v6_0_set_powergating_state(void *handle,
return 0;
}
-static void ih_v6_0_get_clockgating_state(void *handle, u64 *flags)
+static void ih_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
*flags |= AMD_CG_SUPPORT_IH_CG;
@@ -785,7 +783,6 @@ static void ih_v6_0_get_clockgating_state(void *handle, u64 *flags)
static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
.name = "ih_v6_0",
.early_init = ih_v6_0_early_init,
- .late_init = NULL,
.sw_init = ih_v6_0_sw_init,
.sw_fini = ih_v6_0_sw_fini,
.hw_init = ih_v6_0_hw_init,
@@ -798,8 +795,6 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
.set_clockgating_state = ih_v6_0_set_clockgating_state,
.set_powergating_state = ih_v6_0_set_powergating_state,
.get_clockgating_state = ih_v6_0_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs ih_v6_0_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
index 2e0469feca1e..068ed849dbad 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -532,9 +532,9 @@ static void ih_v6_1_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &ih_v6_1_self_irq_funcs;
}
-static int ih_v6_1_early_init(void *handle)
+static int ih_v6_1_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -547,10 +547,10 @@ static int ih_v6_1_early_init(void *handle)
return 0;
}
-static int ih_v6_1_sw_init(void *handle)
+static int ih_v6_1_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr;
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
@@ -593,19 +593,19 @@ static int ih_v6_1_sw_init(void *handle)
return r;
}
-static int ih_v6_1_sw_fini(void *handle)
+static int ih_v6_1_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int ih_v6_1_hw_init(void *handle)
+static int ih_v6_1_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = ih_v6_1_irq_init(adev);
if (r)
@@ -614,42 +614,36 @@ static int ih_v6_1_hw_init(void *handle)
return 0;
}
-static int ih_v6_1_hw_fini(void *handle)
+static int ih_v6_1_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- ih_v6_1_irq_disable(adev);
+ ih_v6_1_irq_disable(ip_block->adev);
return 0;
}
-static int ih_v6_1_suspend(void *handle)
+static int ih_v6_1_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return ih_v6_1_hw_fini(adev);
+ return ih_v6_1_hw_fini(ip_block);
}
-static int ih_v6_1_resume(void *handle)
+static int ih_v6_1_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return ih_v6_1_hw_init(adev);
+ return ih_v6_1_hw_init(ip_block);
}
-static bool ih_v6_1_is_idle(void *handle)
+static bool ih_v6_1_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int ih_v6_1_wait_for_idle(void *handle)
+static int ih_v6_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int ih_v6_1_soft_reset(void *handle)
+static int ih_v6_1_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
@@ -680,10 +674,10 @@ static void ih_v6_1_update_clockgating_state(struct amdgpu_device *adev,
return;
}
-static int ih_v6_1_set_clockgating_state(void *handle,
+static int ih_v6_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
ih_v6_1_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -743,10 +737,10 @@ static void ih_v6_1_update_ih_mem_power_gating(struct amdgpu_device *adev,
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
}
-static int ih_v6_1_set_powergating_state(void *handle,
+static int ih_v6_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
@@ -755,9 +749,9 @@ static int ih_v6_1_set_powergating_state(void *handle,
return 0;
}
-static void ih_v6_1_get_clockgating_state(void *handle, u64 *flags)
+static void ih_v6_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
*flags |= AMD_CG_SUPPORT_IH_CG;
@@ -768,7 +762,6 @@ static void ih_v6_1_get_clockgating_state(void *handle, u64 *flags)
static const struct amd_ip_funcs ih_v6_1_ip_funcs = {
.name = "ih_v6_1",
.early_init = ih_v6_1_early_init,
- .late_init = NULL,
.sw_init = ih_v6_1_sw_init,
.sw_fini = ih_v6_1_sw_fini,
.hw_init = ih_v6_1_hw_init,
@@ -781,8 +774,6 @@ static const struct amd_ip_funcs ih_v6_1_ip_funcs = {
.set_clockgating_state = ih_v6_1_set_clockgating_state,
.set_powergating_state = ih_v6_1_set_powergating_state,
.get_clockgating_state = ih_v6_1_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs ih_v6_1_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
index 6852081fcff2..40a3530e0453 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
@@ -528,19 +528,19 @@ static void ih_v7_0_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &ih_v7_0_self_irq_funcs;
}
-static int ih_v7_0_early_init(void *handle)
+static int ih_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
ih_v7_0_set_interrupt_funcs(adev);
ih_v7_0_set_self_irq_funcs(adev);
return 0;
}
-static int ih_v7_0_sw_init(void *handle)
+static int ih_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr;
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0,
@@ -583,19 +583,19 @@ static int ih_v7_0_sw_init(void *handle)
return r;
}
-static int ih_v7_0_sw_fini(void *handle)
+static int ih_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int ih_v7_0_hw_init(void *handle)
+static int ih_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = ih_v7_0_irq_init(adev);
if (r)
@@ -604,42 +604,36 @@ static int ih_v7_0_hw_init(void *handle)
return 0;
}
-static int ih_v7_0_hw_fini(void *handle)
+static int ih_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- ih_v7_0_irq_disable(adev);
+ ih_v7_0_irq_disable(ip_block->adev);
return 0;
}
-static int ih_v7_0_suspend(void *handle)
+static int ih_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return ih_v7_0_hw_fini(adev);
+ return ih_v7_0_hw_fini(ip_block);
}
-static int ih_v7_0_resume(void *handle)
+static int ih_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return ih_v7_0_hw_init(adev);
+ return ih_v7_0_hw_init(ip_block);
}
-static bool ih_v7_0_is_idle(void *handle)
+static bool ih_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int ih_v7_0_wait_for_idle(void *handle)
+static int ih_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int ih_v7_0_soft_reset(void *handle)
+static int ih_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
@@ -670,10 +664,10 @@ static void ih_v7_0_update_clockgating_state(struct amdgpu_device *adev,
return;
}
-static int ih_v7_0_set_clockgating_state(void *handle,
+static int ih_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
ih_v7_0_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -733,10 +727,10 @@ static void ih_v7_0_update_ih_mem_power_gating(struct amdgpu_device *adev,
WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl);
}
-static int ih_v7_0_set_powergating_state(void *handle,
+static int ih_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_PG_STATE_GATE);
if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG)
@@ -745,9 +739,9 @@ static int ih_v7_0_set_powergating_state(void *handle,
return 0;
}
-static void ih_v7_0_get_clockgating_state(void *handle, u64 *flags)
+static void ih_v7_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL))
*flags |= AMD_CG_SUPPORT_IH_CG;
@@ -758,7 +752,6 @@ static void ih_v7_0_get_clockgating_state(void *handle, u64 *flags)
static const struct amd_ip_funcs ih_v7_0_ip_funcs = {
.name = "ih_v7_0",
.early_init = ih_v7_0_early_init,
- .late_init = NULL,
.sw_init = ih_v7_0_sw_init,
.sw_fini = ih_v7_0_sw_fini,
.hw_init = ih_v7_0_hw_init,
@@ -771,8 +764,6 @@ static const struct amd_ip_funcs ih_v7_0_ip_funcs = {
.set_clockgating_state = ih_v7_0_set_clockgating_state,
.set_powergating_state = ih_v7_0_set_powergating_state,
.get_clockgating_state = ih_v7_0_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs ih_v7_0_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
index d4f72e47ae9e..cc626036ed9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -32,6 +32,7 @@
#include "gc/gc_11_0_0_sh_mask.h"
MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin");
@@ -39,6 +40,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_2_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_imu.bin");
static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
{
@@ -50,7 +52,12 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
DRM_DEBUG("\n");
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix);
+ if (amdgpu_is_kicker_fw(adev))
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu_kicker.bin", ucode_prefix);
+ else
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
index 1341f0292031..df898dbb746e 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c
@@ -47,7 +47,8 @@ static int imu_v12_0_init_microcode(struct amdgpu_device *adev)
DRM_DEBUG("\n");
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
- err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix);
+ err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED,
+ "amdgpu/%s_imu.bin", ucode_prefix);
if (err)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c
index 964c29ef25dc..0027a639c7e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c
@@ -50,26 +50,29 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp)
isp_base = adev->rmmio_base;
- isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL);
+ isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL);
if (!isp->isp_cell) {
r = -ENOMEM;
- DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__);
+ drm_err(&adev->ddev,
+ "%s: isp mfd cell alloc failed\n", __func__);
goto failure;
}
- num_res = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES + MAX_ISP410_INT_SRC;
+ num_res = MAX_ISP410_MEM_RES + MAX_ISP410_INT_SRC;
isp->isp_res = kcalloc(num_res, sizeof(struct resource),
GFP_KERNEL);
if (!isp->isp_res) {
r = -ENOMEM;
- DRM_ERROR("%s: isp mfd res alloc failed\n", __func__);
+ drm_err(&adev->ddev,
+ "%s: isp mfd res alloc failed\n", __func__);
goto failure;
}
isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL);
if (!isp->isp_pdata) {
r = -ENOMEM;
- DRM_ERROR("%s: isp platform data alloc failed\n", __func__);
+ drm_err(&adev->ddev,
+ "%s: isp platform data alloc failed\n", __func__);
goto failure;
}
@@ -88,14 +91,7 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp)
isp->isp_res[1].start = isp_base + ISP410_PHY0_OFFSET;
isp->isp_res[1].end = isp_base + ISP410_PHY0_OFFSET + ISP410_PHY0_SIZE;
- isp->isp_res[2].name = "isp_gpio_sensor0_reg";
- isp->isp_res[2].flags = IORESOURCE_MEM;
- isp->isp_res[2].start = isp_base + ISP410_GPIO_SENSOR0_OFFSET;
- isp->isp_res[2].end = isp_base + ISP410_GPIO_SENSOR0_OFFSET +
- ISP410_GPIO_SENSOR0_SIZE;
-
- for (idx = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES, int_idx = 0;
- idx < num_res; idx++, int_idx++) {
+ for (idx = MAX_ISP410_MEM_RES, int_idx = 0; idx < num_res; idx++, int_idx++) {
isp->isp_res[idx].name = "isp_4_1_0_irq";
isp->isp_res[idx].flags = IORESOURCE_IRQ;
isp->isp_res[idx].start =
@@ -110,11 +106,12 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp)
isp->isp_cell[0].platform_data = isp->isp_pdata;
isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data);
- isp->isp_i2c_res = kcalloc(1, sizeof(struct resource),
- GFP_KERNEL);
+ /* initialize isp i2c platform data */
+ isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
if (!isp->isp_i2c_res) {
r = -ENOMEM;
- DRM_ERROR("%s: isp mfd res alloc failed\n", __func__);
+ drm_err(&adev->ddev,
+ "%s: isp mfd res alloc failed\n", __func__);
goto failure;
}
@@ -129,9 +126,31 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp)
isp->isp_cell[1].platform_data = isp->isp_pdata;
isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data);
- r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2);
+ /* initialize isp gpiochip platform data */
+ isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_gpio_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp gpio res alloc failed\n", __func__);
+ goto failure;
+ }
+
+ isp->isp_gpio_res[0].name = "isp_gpio_reg";
+ isp->isp_gpio_res[0].flags = IORESOURCE_MEM;
+ isp->isp_gpio_res[0].start = isp_base + ISP410_GPIO_SENSOR_OFFSET;
+ isp->isp_gpio_res[0].end = isp_base + ISP410_GPIO_SENSOR_OFFSET +
+ ISP410_GPIO_SENSOR_SIZE;
+
+ isp->isp_cell[2].name = "amdisp-pinctrl";
+ isp->isp_cell[2].num_resources = 1;
+ isp->isp_cell[2].resources = &isp->isp_gpio_res[0];
+ isp->isp_cell[2].platform_data = isp->isp_pdata;
+ isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data);
+
+ r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 3);
if (r) {
- DRM_ERROR("%s: add mfd hotplug device failed\n", __func__);
+ drm_err(&adev->ddev,
+ "%s: add mfd hotplug device failed\n", __func__);
goto failure;
}
@@ -143,6 +162,7 @@ failure:
kfree(isp->isp_res);
kfree(isp->isp_cell);
kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
return r;
}
@@ -155,6 +175,7 @@ static int isp_v4_1_0_hw_fini(struct amdgpu_isp *isp)
kfree(isp->isp_cell);
kfree(isp->isp_pdata);
kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h
index 7db24c0f1080..4d239198edd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h
@@ -42,8 +42,8 @@
#define ISP410_I2C0_OFFSET 0x66400
#define ISP410_I2C0_SIZE 0x100
-#define ISP410_GPIO_SENSOR0_OFFSET 0x6613C
-#define ISP410_GPIO_SENSOR0_SIZE 0x4
+#define ISP410_GPIO_SENSOR_OFFSET 0x6613C
+#define ISP410_GPIO_SENSOR_SIZE 0x54
void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp);
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
index b56f27295468..574880d67009 100644
--- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c
@@ -25,6 +25,7 @@
*
*/
+#include <linux/gpio/machine.h>
#include "amdgpu.h"
#include "isp_v4_1_1.h"
@@ -39,38 +40,71 @@ static const unsigned int isp_4_1_1_int_srcid[MAX_ISP411_INT_SRC] = {
ISP_4_1__SRCID__ISP_RINGBUFFER_WPT16
};
+static struct gpiod_lookup_table isp_gpio_table = {
+ .dev_id = "amd_isp_capture",
+ .table = {
+ GPIO_LOOKUP("AMDI0030:00", 85, "enable_isp", GPIO_ACTIVE_HIGH),
+ { }
+ },
+};
+
+static struct gpiod_lookup_table isp_sensor_gpio_table = {
+ .dev_id = "i2c-ov05c10",
+ .table = {
+ GPIO_LOOKUP("amdisp-pinctrl", 0, "enable", GPIO_ACTIVE_HIGH),
+ { }
+ },
+};
+
static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp)
{
struct amdgpu_device *adev = isp->adev;
int idx, int_idx, num_res, r;
+ u8 isp_dev_hid[ACPI_ID_LEN];
u64 isp_base;
if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
return -EINVAL;
+ r = amdgpu_acpi_get_isp4_dev_hid(&isp_dev_hid);
+ if (r) {
+ drm_dbg(&adev->ddev, "Invalid isp platform detected (%d)", r);
+ /* allow GPU init to progress */
+ return 0;
+ }
+
+ /* add GPIO resources required for OMNI5C10 sensor */
+ if (!strcmp("OMNI5C10", isp_dev_hid)) {
+ gpiod_add_lookup_table(&isp_gpio_table);
+ gpiod_add_lookup_table(&isp_sensor_gpio_table);
+ }
+
isp_base = adev->rmmio_base;
- isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL);
+ isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL);
if (!isp->isp_cell) {
r = -ENOMEM;
- DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__);
+ drm_err(&adev->ddev,
+ "%s: isp mfd cell alloc failed\n", __func__);
goto failure;
}
- num_res = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES + MAX_ISP411_INT_SRC;
+ num_res = MAX_ISP411_MEM_RES + MAX_ISP411_INT_SRC;
isp->isp_res = kcalloc(num_res, sizeof(struct resource),
GFP_KERNEL);
if (!isp->isp_res) {
r = -ENOMEM;
- DRM_ERROR("%s: isp mfd res alloc failed\n", __func__);
+ drm_err(&adev->ddev,
+ "%s: isp mfd res alloc failed\n", __func__);
goto failure;
}
isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL);
if (!isp->isp_pdata) {
r = -ENOMEM;
- DRM_ERROR("%s: isp platform data alloc failed\n", __func__);
+ drm_err(&adev->ddev,
+ "%s: isp platform data alloc failed\n", __func__);
goto failure;
}
@@ -89,14 +123,7 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp)
isp->isp_res[1].start = isp_base + ISP411_PHY0_OFFSET;
isp->isp_res[1].end = isp_base + ISP411_PHY0_OFFSET + ISP411_PHY0_SIZE;
- isp->isp_res[2].name = "isp_4_1_1_sensor0_reg";
- isp->isp_res[2].flags = IORESOURCE_MEM;
- isp->isp_res[2].start = isp_base + ISP411_GPIO_SENSOR0_OFFSET;
- isp->isp_res[2].end = isp_base + ISP411_GPIO_SENSOR0_OFFSET +
- ISP411_GPIO_SENSOR0_SIZE;
-
- for (idx = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES, int_idx = 0;
- idx < num_res; idx++, int_idx++) {
+ for (idx = MAX_ISP411_MEM_RES, int_idx = 0; idx < num_res; idx++, int_idx++) {
isp->isp_res[idx].name = "isp_4_1_1_irq";
isp->isp_res[idx].flags = IORESOURCE_IRQ;
isp->isp_res[idx].start =
@@ -111,10 +138,12 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp)
isp->isp_cell[0].platform_data = isp->isp_pdata;
isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data);
+ /* initialize isp i2c platform data */
isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
if (!isp->isp_i2c_res) {
r = -ENOMEM;
- DRM_ERROR("%s: isp mfd res alloc failed\n", __func__);
+ drm_err(&adev->ddev,
+ "%s: isp mfd res alloc failed\n", __func__);
goto failure;
}
@@ -129,9 +158,31 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp)
isp->isp_cell[1].platform_data = isp->isp_pdata;
isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data);
- r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2);
+ /* initialize isp gpiochip platform data */
+ isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL);
+ if (!isp->isp_gpio_res) {
+ r = -ENOMEM;
+ drm_err(&adev->ddev,
+ "%s: isp gpio res alloc failed\n", __func__);
+ goto failure;
+ }
+
+ isp->isp_gpio_res[0].name = "isp_gpio_reg";
+ isp->isp_gpio_res[0].flags = IORESOURCE_MEM;
+ isp->isp_gpio_res[0].start = isp_base + ISP411_GPIO_SENSOR_OFFSET;
+ isp->isp_gpio_res[0].end = isp_base + ISP411_GPIO_SENSOR_OFFSET +
+ ISP411_GPIO_SENSOR_SIZE;
+
+ isp->isp_cell[2].name = "amdisp-pinctrl";
+ isp->isp_cell[2].num_resources = 1;
+ isp->isp_cell[2].resources = &isp->isp_gpio_res[0];
+ isp->isp_cell[2].platform_data = isp->isp_pdata;
+ isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data);
+
+ r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 3);
if (r) {
- DRM_ERROR("%s: add mfd hotplug device failed\n", __func__);
+ drm_err(&adev->ddev,
+ "%s: add mfd hotplug device failed\n", __func__);
goto failure;
}
@@ -143,6 +194,7 @@ failure:
kfree(isp->isp_res);
kfree(isp->isp_cell);
kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
return r;
}
@@ -155,6 +207,7 @@ static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp)
kfree(isp->isp_cell);
kfree(isp->isp_pdata);
kfree(isp->isp_i2c_res);
+ kfree(isp->isp_gpio_res);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h
index 40887ddeb08c..fe45d70d87f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h
@@ -33,7 +33,6 @@
#include "ivsrcid/isp/irqsrcs_isp_4_1.h"
#define MAX_ISP411_MEM_RES 2
-#define MAX_ISP411_SENSOR_RES 1
#define MAX_ISP411_INT_SRC 8
#define ISP411_PHY0_OFFSET 0x66700
@@ -42,8 +41,8 @@
#define ISP411_I2C0_OFFSET 0x66400
#define ISP411_I2C0_SIZE 0x100
-#define ISP411_GPIO_SENSOR0_OFFSET 0x6613C
-#define ISP411_GPIO_SENSOR0_SIZE 0x4
+#define ISP411_GPIO_SENSOR_OFFSET 0x6613C
+#define ISP411_GPIO_SENSOR_SIZE 0x54
void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp);
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index 6e0e88076224..9e428e669ada 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -458,13 +458,13 @@ static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev,
/**
* jpeg_v1_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-int jpeg_v1_0_early_init(void *handle)
+int jpeg_v1_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->jpeg.num_jpeg_inst = 1;
adev->jpeg.num_jpeg_rings = 1;
@@ -478,12 +478,12 @@ int jpeg_v1_0_early_init(void *handle)
/**
* jpeg_v1_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-int jpeg_v1_0_sw_init(void *handle)
+int jpeg_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -509,13 +509,13 @@ int jpeg_v1_0_sw_init(void *handle)
/**
* jpeg_v1_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG free up sw allocation
*/
-void jpeg_v1_0_sw_fini(void *handle)
+void jpeg_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_ring_fini(adev->jpeg.inst->ring_dec);
}
@@ -604,15 +604,15 @@ static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev)
static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
int cnt = 0;
- mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+ mutex_lock(&adev->vcn.inst[0].vcn1_jpeg1_workaround);
if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_dec))
DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n");
- for (cnt = 0; cnt < adev->vcn.num_enc_rings; cnt++) {
+ for (cnt = 0; cnt < adev->vcn.inst[0].num_enc_rings; cnt++) {
if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_enc[cnt]))
DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n", cnt);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
index 9654d22e0376..097328635083 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
@@ -24,9 +24,9 @@
#ifndef __JPEG_V1_0_H__
#define __JPEG_V1_0_H__
-int jpeg_v1_0_early_init(void *handle);
-int jpeg_v1_0_sw_init(void *handle);
-void jpeg_v1_0_sw_fini(void *handle);
+int jpeg_v1_0_early_init(struct amdgpu_ip_block *ip_block);
+int jpeg_v1_0_sw_init(struct amdgpu_ip_block *ip_block);
+void jpeg_v1_0_sw_fini(struct amdgpu_ip_block *ip_block);
void jpeg_v1_0_start(struct amdgpu_device *adev, int mode);
#define JPEG_V1_REG_RANGE_START 0x8000
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 41c0f8750dc1..4cde8a8bcc83 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -33,21 +33,37 @@
#include "vcn/vcn_2_0_0_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_2_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v2_0_set_powergating_state(void *handle,
+static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
* jpeg_v2_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v2_0_early_init(void *handle)
+static int jpeg_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->jpeg.num_jpeg_inst = 1;
adev->jpeg.num_jpeg_rings = 1;
@@ -61,13 +77,13 @@ static int jpeg_v2_0_early_init(void *handle)
/**
* jpeg_v2_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v2_0_sw_init(void *handle)
+static int jpeg_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -98,25 +114,34 @@ static int jpeg_v2_0_sw_init(void *handle)
adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_2_0, ARRAY_SIZE(jpeg_reg_list_2_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v2_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v2_0_sw_fini(void *handle)
+static int jpeg_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -125,12 +150,12 @@ static int jpeg_v2_0_sw_fini(void *handle)
/**
* jpeg_v2_0_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v2_0_hw_init(void *handle)
+static int jpeg_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
@@ -142,19 +167,19 @@ static int jpeg_v2_0_hw_init(void *handle)
/**
* jpeg_v2_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v2_0_hw_fini(void *handle)
+static int jpeg_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
- jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v2_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -162,20 +187,19 @@ static int jpeg_v2_0_hw_fini(void *handle)
/**
* jpeg_v2_0_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v2_0_suspend(void *handle)
+static int jpeg_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v2_0_hw_fini(adev);
+ r = jpeg_v2_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -183,20 +207,19 @@ static int jpeg_v2_0_suspend(void *handle)
/**
* jpeg_v2_0_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v2_0_resume(void *handle)
+static int jpeg_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v2_0_hw_init(adev);
+ r = jpeg_v2_0_hw_init(ip_block);
return r;
}
@@ -657,18 +680,18 @@ void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
}
}
-static bool jpeg_v2_0_is_idle(void *handle)
+static bool jpeg_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return ((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v2_0_wait_for_idle(void *handle)
+static int jpeg_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS, UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
@@ -677,14 +700,14 @@ static int jpeg_v2_0_wait_for_idle(void *handle)
return ret;
}
-static int jpeg_v2_0_set_clockgating_state(void *handle,
+static int jpeg_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
- if (!jpeg_v2_0_is_idle(handle))
+ if (!jpeg_v2_0_is_idle(ip_block))
return -EBUSY;
jpeg_v2_0_enable_clock_gating(adev);
} else {
@@ -694,10 +717,10 @@ static int jpeg_v2_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v2_0_set_powergating_state(void *handle,
+static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (state == adev->jpeg.cur_state)
@@ -741,10 +764,16 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ jpeg_v2_0_stop(ring->adev);
+ jpeg_v2_0_start(ring->adev);
+ return amdgpu_ring_test_helper(ring);
+}
+
static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
.name = "jpeg_v2_0",
.early_init = jpeg_v2_0_early_init,
- .late_init = NULL,
.sw_init = jpeg_v2_0_sw_init,
.sw_fini = jpeg_v2_0_sw_fini,
.hw_init = jpeg_v2_0_hw_init,
@@ -753,14 +782,10 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
.resume = jpeg_v2_0_resume,
.is_idle = jpeg_v2_0_is_idle,
.wait_for_idle = jpeg_v2_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_0_set_clockgating_state,
.set_powergating_state = jpeg_v2_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
@@ -791,6 +816,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_0_ring_reset,
};
static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index eedb9a829d95..8b39e114f3be 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -36,9 +36,25 @@
#define JPEG25_MAX_HW_INSTANCES_ARCTURUS 2
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_2_5[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v2_5_set_powergating_state(void *handle,
+static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev);
@@ -50,13 +66,13 @@ static int amdgpu_ih_clientid_jpeg[] = {
/**
* jpeg_v2_5_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v2_5_early_init(void *handle)
+static int jpeg_v2_5_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 harvest;
int i;
@@ -81,15 +97,15 @@ static int jpeg_v2_5_early_init(void *handle)
/**
* jpeg_v2_5_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v2_5_sw_init(void *handle)
+static int jpeg_v2_5_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
@@ -147,25 +163,34 @@ static int jpeg_v2_5_sw_init(void *handle)
if (r)
return r;
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_2_5, ARRAY_SIZE(jpeg_reg_list_2_5));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v2_5_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v2_5_sw_fini(void *handle)
+static int jpeg_v2_5_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -174,12 +199,12 @@ static int jpeg_v2_5_sw_fini(void *handle)
/**
* jpeg_v2_5_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v2_5_hw_init(void *handle)
+static int jpeg_v2_5_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r;
@@ -202,16 +227,16 @@ static int jpeg_v2_5_hw_init(void *handle)
/**
* jpeg_v2_5_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v2_5_hw_fini(void *handle)
+static int jpeg_v2_5_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
@@ -219,7 +244,7 @@ static int jpeg_v2_5_hw_fini(void *handle)
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
- jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v2_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
amdgpu_irq_put(adev, &adev->jpeg.inst[i].ras_poison_irq, 0);
@@ -231,20 +256,19 @@ static int jpeg_v2_5_hw_fini(void *handle)
/**
* jpeg_v2_5_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v2_5_suspend(void *handle)
+static int jpeg_v2_5_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v2_5_hw_fini(adev);
+ r = jpeg_v2_5_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -252,20 +276,19 @@ static int jpeg_v2_5_suspend(void *handle)
/**
* jpeg_v2_5_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v2_5_resume(void *handle)
+static int jpeg_v2_5_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v2_5_hw_init(adev);
+ r = jpeg_v2_5_hw_init(ip_block);
return r;
}
@@ -312,6 +335,44 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data);
}
+static void jpeg_v2_5_start_inst(struct amdgpu_device *adev, int i)
+{
+ struct amdgpu_ring *ring = adev->jpeg.inst[i].ring_dec;
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v2_5_disable_clock_gating(adev, i);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
+}
+
/**
* jpeg_v2_5_start - start JPEG block
*
@@ -321,52 +382,33 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
*/
static int jpeg_v2_5_start(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
int i;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
+ jpeg_v2_5_start_inst(adev, i);
- ring = adev->jpeg.inst[i].ring_dec;
- /* disable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-
- /* JPEG disable CGC */
- jpeg_v2_5_disable_clock_gating(adev, i);
-
- /* MJPEG global tiling registers */
- WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* enable JMI channel */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- /* enable System Interrupt for JRBC */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN),
- JPEG_SYS_INT_EN__DJRBC_MASK,
- ~JPEG_SYS_INT_EN__DJRBC_MASK);
-
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
- WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
- ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR);
}
return 0;
}
+static void jpeg_v2_5_stop_inst(struct amdgpu_device *adev, int i)
+{
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v2_5_enable_clock_gating(adev, i);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+}
+
/**
* jpeg_v2_5_stop - stop JPEG block
*
@@ -381,18 +423,7 @@ static int jpeg_v2_5_stop(struct amdgpu_device *adev)
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
continue;
-
- /* reset JMI */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL),
- UVD_JMI_CNTL__SOFT_RESET_MASK,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- jpeg_v2_5_enable_clock_gating(adev, i);
-
- /* enable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS),
- UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+ jpeg_v2_5_stop_inst(adev, i);
}
return 0;
@@ -484,9 +515,9 @@ static void jpeg_v2_6_dec_ring_insert_end(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, (1 << (ring->me * 2 + 14)));
}
-static bool jpeg_v2_5_is_idle(void *handle)
+static bool jpeg_v2_5_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
@@ -501,9 +532,9 @@ static bool jpeg_v2_5_is_idle(void *handle)
return ret;
}
-static int jpeg_v2_5_wait_for_idle(void *handle)
+static int jpeg_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
@@ -520,10 +551,10 @@ static int jpeg_v2_5_wait_for_idle(void *handle)
return 0;
}
-static int jpeg_v2_5_set_clockgating_state(void *handle,
+static int jpeg_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
int i;
@@ -532,7 +563,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
continue;
if (enable) {
- if (!jpeg_v2_5_is_idle(handle))
+ if (!jpeg_v2_5_is_idle(ip_block))
return -EBUSY;
jpeg_v2_5_enable_clock_gating(adev, i);
} else {
@@ -543,10 +574,10 @@ static int jpeg_v2_5_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v2_5_set_powergating_state(void *handle,
+static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (state == adev->jpeg.cur_state)
@@ -612,10 +643,16 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ jpeg_v2_5_stop_inst(ring->adev, ring->me);
+ jpeg_v2_5_start_inst(ring->adev, ring->me);
+ return amdgpu_ring_test_helper(ring);
+}
+
static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
.name = "jpeg_v2_5",
.early_init = jpeg_v2_5_early_init,
- .late_init = NULL,
.sw_init = jpeg_v2_5_sw_init,
.sw_fini = jpeg_v2_5_sw_fini,
.hw_init = jpeg_v2_5_hw_init,
@@ -624,20 +661,15 @@ static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
.resume = jpeg_v2_5_resume,
.is_idle = jpeg_v2_5_is_idle,
.wait_for_idle = jpeg_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_5_set_clockgating_state,
.set_powergating_state = jpeg_v2_5_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = {
.name = "jpeg_v2_6",
.early_init = jpeg_v2_5_early_init,
- .late_init = NULL,
.sw_init = jpeg_v2_5_sw_init,
.sw_fini = jpeg_v2_5_sw_fini,
.hw_init = jpeg_v2_5_hw_init,
@@ -646,14 +678,10 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = {
.resume = jpeg_v2_5_resume,
.is_idle = jpeg_v2_5_is_idle,
.wait_for_idle = jpeg_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_5_set_clockgating_state,
.set_powergating_state = jpeg_v2_5_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
@@ -684,6 +712,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_5_ring_reset,
};
static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
@@ -714,6 +743,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v2_5_ring_reset,
};
static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index b1e7fd25afbc..2f8510c2986b 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -34,21 +34,37 @@
#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_3_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v3_0_set_powergating_state(void *handle,
+static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
* jpeg_v3_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v3_0_early_init(void *handle)
+static int jpeg_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 harvest;
@@ -75,13 +91,13 @@ static int jpeg_v3_0_early_init(void *handle)
/**
* jpeg_v3_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v3_0_sw_init(void *handle)
+static int jpeg_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -112,25 +128,34 @@ static int jpeg_v3_0_sw_init(void *handle)
adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH);
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_3_0, ARRAY_SIZE(jpeg_reg_list_3_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v3_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v3_0_sw_fini(void *handle)
+static int jpeg_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -139,12 +164,12 @@ static int jpeg_v3_0_sw_fini(void *handle)
/**
* jpeg_v3_0_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v3_0_hw_init(void *handle)
+static int jpeg_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
@@ -156,19 +181,19 @@ static int jpeg_v3_0_hw_init(void *handle)
/**
* jpeg_v3_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v3_0_hw_fini(void *handle)
+static int jpeg_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
- jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -176,20 +201,19 @@ static int jpeg_v3_0_hw_fini(void *handle)
/**
* jpeg_v3_0_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v3_0_suspend(void *handle)
+static int jpeg_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v3_0_hw_fini(adev);
+ r = jpeg_v3_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -197,20 +221,19 @@ static int jpeg_v3_0_suspend(void *handle)
/**
* jpeg_v3_0_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v3_0_resume(void *handle)
+static int jpeg_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v3_0_hw_init(adev);
+ r = jpeg_v3_0_hw_init(ip_block);
return r;
}
@@ -447,9 +470,9 @@ static void jpeg_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
}
}
-static bool jpeg_v3_0_is_idle(void *handle)
+static bool jpeg_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 1;
ret &= (((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) &
@@ -459,23 +482,23 @@ static bool jpeg_v3_0_is_idle(void *handle)
return ret;
}
-static int jpeg_v3_0_wait_for_idle(void *handle)
+static int jpeg_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return SOC15_WAIT_ON_RREG(JPEG, 0, mmUVD_JRBC_STATUS,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v3_0_set_clockgating_state(void *handle,
+static int jpeg_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
if (enable) {
- if (!jpeg_v3_0_is_idle(handle))
+ if (!jpeg_v3_0_is_idle(ip_block))
return -EBUSY;
jpeg_v3_0_enable_clock_gating(adev);
} else {
@@ -485,10 +508,10 @@ static int jpeg_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v3_0_set_powergating_state(void *handle,
+static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if(state == adev->jpeg.cur_state)
@@ -532,10 +555,16 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ jpeg_v3_0_stop(ring->adev);
+ jpeg_v3_0_start(ring->adev);
+ return amdgpu_ring_test_helper(ring);
+}
+
static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
.name = "jpeg_v3_0",
.early_init = jpeg_v3_0_early_init,
- .late_init = NULL,
.sw_init = jpeg_v3_0_sw_init,
.sw_fini = jpeg_v3_0_sw_fini,
.hw_init = jpeg_v3_0_hw_init,
@@ -544,14 +573,10 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
.resume = jpeg_v3_0_resume,
.is_idle = jpeg_v3_0_is_idle,
.wait_for_idle = jpeg_v3_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v3_0_set_clockgating_state,
.set_powergating_state = jpeg_v3_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
@@ -582,6 +607,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v3_0_ring_reset,
};
static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index 6c5c1a68a9b7..f17ec5414fd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -36,25 +36,40 @@
#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev);
static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v4_0_set_powergating_state(void *handle,
+static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev);
-
static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
/**
* jpeg_v4_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v4_0_early_init(void *handle)
+static int jpeg_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->jpeg.num_jpeg_inst = 1;
@@ -70,13 +85,13 @@ static int jpeg_v4_0_early_init(void *handle)
/**
* jpeg_v4_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v4_0_sw_init(void *handle)
+static int jpeg_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
@@ -124,25 +139,33 @@ static int jpeg_v4_0_sw_init(void *handle)
if (r)
return r;
- return 0;
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0, ARRAY_SIZE(jpeg_reg_list_4_0));
+ if (r)
+ return r;
+
+ adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+
+ return r;
}
/**
* jpeg_v4_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v4_0_sw_fini(void *handle)
+static int jpeg_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -151,12 +174,12 @@ static int jpeg_v4_0_sw_fini(void *handle)
/**
* jpeg_v4_0_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v4_0_hw_init(void *handle)
+static int jpeg_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
@@ -187,19 +210,19 @@ static int jpeg_v4_0_hw_init(void *handle)
/**
* jpeg_v4_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v4_0_hw_fini(void *handle)
+static int jpeg_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (!amdgpu_sriov_vf(adev)) {
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
- jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v4_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
@@ -210,20 +233,19 @@ static int jpeg_v4_0_hw_fini(void *handle)
/**
* jpeg_v4_0_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v4_0_suspend(void *handle)
+static int jpeg_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v4_0_hw_fini(adev);
+ r = jpeg_v4_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -231,20 +253,19 @@ static int jpeg_v4_0_suspend(void *handle)
/**
* jpeg_v4_0_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v4_0_resume(void *handle)
+static int jpeg_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v4_0_hw_init(adev);
+ r = jpeg_v4_0_hw_init(ip_block);
return r;
}
@@ -609,9 +630,9 @@ static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
}
}
-static bool jpeg_v4_0_is_idle(void *handle)
+static bool jpeg_v4_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 1;
ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) &
@@ -621,23 +642,23 @@ static bool jpeg_v4_0_is_idle(void *handle)
return ret;
}
-static int jpeg_v4_0_wait_for_idle(void *handle)
+static int jpeg_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v4_0_set_clockgating_state(void *handle,
+static int jpeg_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
if (enable) {
- if (!jpeg_v4_0_is_idle(handle))
+ if (!jpeg_v4_0_is_idle(ip_block))
return -EBUSY;
jpeg_v4_0_enable_clock_gating(adev);
} else {
@@ -647,10 +668,10 @@ static int jpeg_v4_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v4_0_set_powergating_state(void *handle,
+static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (amdgpu_sriov_vf(adev)) {
@@ -699,10 +720,19 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ if (amdgpu_sriov_vf(ring->adev))
+ return -EINVAL;
+
+ jpeg_v4_0_stop(ring->adev);
+ jpeg_v4_0_start(ring->adev);
+ return amdgpu_ring_test_helper(ring);
+}
+
static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
.name = "jpeg_v4_0",
.early_init = jpeg_v4_0_early_init,
- .late_init = NULL,
.sw_init = jpeg_v4_0_sw_init,
.sw_fini = jpeg_v4_0_sw_fini,
.hw_init = jpeg_v4_0_hw_init,
@@ -711,14 +741,10 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
.resume = jpeg_v4_0_resume,
.is_idle = jpeg_v4_0_is_idle,
.wait_for_idle = jpeg_v4_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v4_0_set_clockgating_state,
.set_powergating_state = jpeg_v4_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
@@ -749,6 +775,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v4_0_ring_reset,
};
static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 86958cb2c2ab..79e342d5ab28 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -43,7 +43,7 @@ enum jpeg_engin_status {
static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v4_0_3_set_powergating_state(void *handle,
+static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring);
@@ -59,24 +59,67 @@ static int amdgpu_ih_srcid_jpeg[] = {
VCN_4_0__SRCID__JPEG7_DECODE
};
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_3[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_SYS_INT_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS),
+};
+
static inline bool jpeg_v4_0_3_normalizn_reqd(struct amdgpu_device *adev)
{
- return amdgpu_sriov_vf(adev) ||
- (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4));
+ return (adev->jpeg.caps & AMDGPU_JPEG_CAPS(RRMT_ENABLED)) == 0;
+}
+
+static inline int jpeg_v4_0_3_core_reg_offset(u32 pipe)
+{
+ if (pipe)
+ return ((0x40 * pipe) - 0xc80);
+ else
+ return 0;
}
/**
* jpeg_v4_0_3_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v4_0_3_early_init(void *handle)
+static int jpeg_v4_0_3_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3;
jpeg_v4_0_3_set_dec_ring_funcs(adev);
jpeg_v4_0_3_set_irq_funcs(adev);
@@ -88,13 +131,13 @@ static int jpeg_v4_0_3_early_init(void *handle)
/**
* jpeg_v4_0_3_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v4_0_3_sw_init(void *handle)
+static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, j, r, jpeg_inst;
@@ -106,6 +149,18 @@ static int jpeg_v4_0_3_sw_init(void *handle)
return r;
}
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
r = amdgpu_jpeg_sw_init(adev);
if (r)
return r;
@@ -144,10 +199,8 @@ static int jpeg_v4_0_3_sw_init(void *handle)
adev->jpeg.internal.jpeg_pitch[j] =
regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
adev->jpeg.inst[i].external.jpeg_pitch[j] =
- SOC15_REG_OFFSET1(
- JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_SCRATCH0,
- (j ? (0x40 * j - 0xc80) : 0));
+ SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_SCRATCH0,
+ jpeg_v4_0_3_core_reg_offset(j));
}
}
@@ -159,25 +212,39 @@ static int jpeg_v4_0_3_sw_init(void *handle)
}
}
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_3, ARRAY_SIZE(jpeg_reg_list_4_0_3));
+ if (r)
+ return r;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+ }
+
return 0;
}
/**
* jpeg_v4_0_3_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v4_0_3_sw_fini(void *handle)
+static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -299,12 +366,12 @@ static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
/**
* jpeg_v4_0_3_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v4_0_3_hw_init(void *handle)
+static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, j, r, jpeg_inst;
@@ -313,7 +380,7 @@ static int jpeg_v4_0_3_hw_init(void *handle)
if (r)
return r;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
ring = &adev->jpeg.inst[i].ring_dec[j];
ring->wptr = 0;
@@ -323,6 +390,11 @@ static int jpeg_v4_0_3_hw_init(void *handle)
}
}
} else {
+ /* This flag is not set for VF, assumed to be disabled always */
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) &
+ 0x100)
+ adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED);
+
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
jpeg_inst = GET_INST(JPEG, i);
@@ -358,42 +430,44 @@ static int jpeg_v4_0_3_hw_init(void *handle)
/**
* jpeg_v4_0_3_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v4_0_3_hw_fini(void *handle)
+static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (!amdgpu_sriov_vf(adev)) {
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
- ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+
return ret;
}
/**
* jpeg_v4_0_3_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v4_0_3_suspend(void *handle)
+static int jpeg_v4_0_3_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v4_0_3_hw_fini(adev);
+ r = jpeg_v4_0_3_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -401,20 +475,19 @@ static int jpeg_v4_0_3_suspend(void *handle)
/**
* jpeg_v4_0_3_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v4_0_3_resume(void *handle)
+static int jpeg_v4_0_3_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v4_0_3_hw_init(adev);
+ r = jpeg_v4_0_3_hw_init(ip_block);
return r;
}
@@ -469,6 +542,75 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst
WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data);
}
+static void jpeg_v4_0_3_start_inst(struct amdgpu_device *adev, int inst)
+{
+ int jpeg_inst = GET_INST(JPEG, inst);
+
+ WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
+ 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
+ SOC15_WAIT_ON_RREG(JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDJ_PWR_ON <<
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* JPEG disable CGC */
+ jpeg_v4_0_3_disable_clock_gating(adev, inst);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+}
+
+static void jpeg_v4_0_3_start_jrbc(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe,
+ ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe));
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC0_UVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ reg_offset);
+}
+
/**
* jpeg_v4_0_3_start - start JPEG block
*
@@ -479,84 +621,36 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst
static int jpeg_v4_0_3_start(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- int i, j, jpeg_inst;
+ int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- jpeg_inst = GET_INST(JPEG, i);
-
- WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
- 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
- SOC15_WAIT_ON_RREG(
- JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
- UVD_PGFSM_STATUS__UVDJ_PWR_ON
- << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
- UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
-
- /* disable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
- regUVD_JPEG_POWER_STATUS),
- 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-
- /* JPEG disable CGC */
- jpeg_v4_0_3_disable_clock_gating(adev, i);
-
- /* MJPEG global tiling registers */
- WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* enable JMI channel */
- WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
+ jpeg_v4_0_3_start_inst(adev, i);
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
- unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
-
ring = &adev->jpeg.inst[i].ring_dec[j];
-
- /* enable System Interrupt for JRBC */
- WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
- regJPEG_SYS_INT_EN),
- JPEG_SYS_INT_EN__DJRBC0_MASK << j,
- ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j));
-
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JMI0_UVD_LMI_JRBC_RB_VMID,
- reg_offset, 0);
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_RB_CNTL,
- reg_offset,
- (0x00000001L | 0x00000002L));
- WREG32_SOC15_OFFSET(
- JPEG, jpeg_inst,
- regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- reg_offset, lower_32_bits(ring->gpu_addr));
- WREG32_SOC15_OFFSET(
- JPEG, jpeg_inst,
- regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- reg_offset, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_RB_RPTR,
- reg_offset, 0);
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_RB_WPTR,
- reg_offset, 0);
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_RB_CNTL,
- reg_offset, 0x00000002L);
- WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
- regUVD_JRBC0_UVD_JRBC_RB_SIZE,
- reg_offset, ring->ring_size / 4);
- ring->wptr = RREG32_SOC15_OFFSET(
- JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR,
- reg_offset);
+ jpeg_v4_0_3_start_jrbc(ring);
}
}
return 0;
}
+static void jpeg_v4_0_3_stop_inst(struct amdgpu_device *adev, int inst)
+{
+ int jpeg_inst = GET_INST(JPEG, inst);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ jpeg_v4_0_3_enable_clock_gating(adev, inst);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+}
+
/**
* jpeg_v4_0_3_stop - stop JPEG block
*
@@ -566,31 +660,10 @@ static int jpeg_v4_0_3_start(struct amdgpu_device *adev)
*/
static int jpeg_v4_0_3_stop(struct amdgpu_device *adev)
{
- int i, jpeg_inst;
+ int i;
- for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
- jpeg_inst = GET_INST(JPEG, i);
- /* reset JMI */
- WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
- UVD_JMI_CNTL__SOFT_RESET_MASK,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- jpeg_v4_0_3_enable_clock_gating(adev, i);
-
- /* enable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst,
- regUVD_JPEG_POWER_STATUS),
- UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-
- WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG,
- 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT);
- SOC15_WAIT_ON_RREG(
- JPEG, jpeg_inst, regUVD_PGFSM_STATUS,
- UVD_PGFSM_STATUS__UVDJ_PWR_OFF
- << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT,
- UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK);
- }
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ jpeg_v4_0_3_stop_inst(adev, i);
return 0;
}
@@ -606,9 +679,8 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- return RREG32_SOC15_OFFSET(
- JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR,
- ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0);
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe));
}
/**
@@ -624,14 +696,12 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell)
return adev->wb.wb[ring->wptr_offs];
- else
- return RREG32_SOC15_OFFSET(
- JPEG, GET_INST(JPEG, ring->me),
- regUVD_JRBC0_UVD_JRBC_RB_WPTR,
- ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0);
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe));
}
-static void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
/* JPEG engine access for HDP flush doesn't work when RRMT is enabled.
* This is a workaround to avoid any HDP flush through JPEG ring.
@@ -653,10 +723,8 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring)
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
- WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me),
- regUVD_JRBC0_UVD_JRBC_RB_WPTR,
- (ring->pipe ? (0x40 * ring->pipe - 0xc80) :
- 0),
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR,
+ jpeg_v4_0_3_core_reg_offset(ring->pipe),
lower_32_bits(ring->wptr));
}
}
@@ -674,11 +742,12 @@ void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */
- }
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x80004000);
+ amdgpu_ring_write(ring,
+ PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0,
+ 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80004000);
+ }
}
/**
@@ -694,11 +763,12 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, 0x62a04);
- }
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x00004000);
+ amdgpu_ring_write(ring,
+ PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0,
+ 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00004000);
+ }
}
/**
@@ -743,14 +813,6 @@ void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x3fbc);
-
- amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
- 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x1);
-
amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
amdgpu_ring_write(ring, 0);
@@ -907,21 +969,17 @@ void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count)
}
}
-static bool jpeg_v4_0_3_is_idle(void *handle)
+static bool jpeg_v4_0_3_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool ret = false;
int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
- unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
-
- ret &= ((RREG32_SOC15_OFFSET(
- JPEG, GET_INST(JPEG, i),
- regUVD_JRBC0_UVD_JRBC_STATUS,
- reg_offset) &
- UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j)) &
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
}
@@ -929,36 +987,33 @@ static bool jpeg_v4_0_3_is_idle(void *handle)
return ret;
}
-static int jpeg_v4_0_3_wait_for_idle(void *handle)
+static int jpeg_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
int i, j;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
- unsigned int reg_offset = (j?(0x40 * j - 0xc80):0);
-
- ret &= SOC15_WAIT_ON_RREG_OFFSET(
- JPEG, GET_INST(JPEG, i),
- regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset,
+ ret &= (SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j),
UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
- UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK));
}
}
return ret;
}
-static int jpeg_v4_0_3_set_clockgating_state(void *handle,
+static int jpeg_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (enable) {
- if (!jpeg_v4_0_3_is_idle(handle))
+ if (!jpeg_v4_0_3_is_idle(ip_block))
return -EBUSY;
jpeg_v4_0_3_enable_clock_gating(adev, i);
} else {
@@ -968,10 +1023,10 @@ static int jpeg_v4_0_3_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v4_0_3_set_powergating_state(void *handle,
+static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (amdgpu_sriov_vf(adev)) {
@@ -1001,6 +1056,14 @@ static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
return 0;
}
+static int jpeg_v4_0_3_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -1055,10 +1118,44 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static void jpeg_v4_0_3_core_stall_reset(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe);
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x1F);
+ SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS,
+ reg_offset, 0x1F, 0x1F);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x1F);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x00);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x00);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00);
+}
+
+static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ if (amdgpu_sriov_vf(ring->adev))
+ return -EOPNOTSUPP;
+
+ jpeg_v4_0_3_core_stall_reset(ring);
+ jpeg_v4_0_3_start_jrbc(ring);
+ return amdgpu_ring_test_helper(ring);
+}
+
static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
.name = "jpeg_v4_0_3",
.early_init = jpeg_v4_0_3_early_init,
- .late_init = NULL,
.sw_init = jpeg_v4_0_3_sw_init,
.sw_fini = jpeg_v4_0_3_sw_fini,
.hw_init = jpeg_v4_0_3_hw_init,
@@ -1067,14 +1164,10 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
.resume = jpeg_v4_0_3_resume,
.is_idle = jpeg_v4_0_3_is_idle,
.wait_for_idle = jpeg_v4_0_3_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v4_0_3_set_clockgating_state,
.set_powergating_state = jpeg_v4_0_3_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
@@ -1088,7 +1181,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */
- 22 + 22 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */
+ 18 + 18 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */
8 + 16,
.emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */
.emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
@@ -1106,6 +1199,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
.emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
.emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v4_0_3_ring_reset,
};
static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)
@@ -1129,6 +1223,11 @@ static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = {
.process = jpeg_v4_0_3_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_ras_irq_funcs = {
+ .set = jpeg_v4_0_3_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
{
int i;
@@ -1137,6 +1236,9 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
}
adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v4_0_3_ras_irq_funcs;
}
const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = {
@@ -1233,14 +1335,147 @@ static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
jpeg_v4_0_3_inst_reset_ras_error_count(adev, i);
}
+static uint32_t jpeg_v4_0_3_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V4_0_3_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V4_0_3_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v4_0_3_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V4_0_3_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v4_0_3_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = {
.query_ras_error_count = jpeg_v4_0_3_query_ras_error_count,
.reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count,
+ .query_poison_status = jpeg_v4_0_3_query_ras_poison_status,
};
+static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int jpeg_v4_0_3_err_codes[] = {
+ 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */
+ 24, 25, 26, 27, 28, 29, 30, 31
+};
+
+static bool jpeg_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ jpeg_v4_0_3_err_codes,
+ ARRAY_SIZE(jpeg_v4_0_3_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops jpeg_v4_0_3_aca_bank_ops = {
+ .aca_bank_parser = jpeg_v4_0_3_aca_bank_parser,
+ .aca_bank_is_valid = jpeg_v4_0_3_aca_bank_is_valid,
+};
+
+static const struct aca_info jpeg_v4_0_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &jpeg_v4_0_3_aca_bank_ops,
+};
+
+static int jpeg_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->jpeg.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG,
+ &jpeg_v4_0_3_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = {
.ras_block = {
.hw_ops = &jpeg_v4_0_3_ras_hw_ops,
+ .ras_late_init = jpeg_v4_0_3_ras_late_init,
},
};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
index 747a3e5f6856..2e110d04af84 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
@@ -46,6 +46,13 @@
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+enum amdgpu_jpeg_v4_0_3_sub_block {
+ AMDGPU_JPEG_V4_0_3_JPEG0 = 0,
+ AMDGPU_JPEG_V4_0_3_JPEG1,
+
+ AMDGPU_JPEG_V4_0_3_MAX_SUB_BLOCK,
+};
+
extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block;
void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
@@ -56,6 +63,7 @@ void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
unsigned int flags);
void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr);
+void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring);
void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring);
void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
index 44eeed445ea9..974030a5c03c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -46,11 +46,26 @@
#define regJPEG_CGC_GATE_INTERNAL_OFFSET 0x4160
#define regUVD_NO_OP_INTERNAL_OFFSET 0x0029
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_5[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v4_0_5_set_powergating_state(void *handle,
+static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
-
static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring);
static int amdgpu_ih_clientid_jpeg[] = {
@@ -58,16 +73,18 @@ static int amdgpu_ih_clientid_jpeg[] = {
SOC15_IH_CLIENTID_VCN1
};
+
+
/**
* jpeg_v4_0_5_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v4_0_5_early_init(void *handle)
+static int jpeg_v4_0_5_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
@@ -94,13 +111,13 @@ static int jpeg_v4_0_5_early_init(void *handle)
/**
* jpeg_v4_0_5_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v4_0_5_sw_init(void *handle)
+static int jpeg_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r, i;
@@ -153,25 +170,37 @@ static int jpeg_v4_0_5_sw_init(void *handle)
adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_PITCH);
}
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_5, ARRAY_SIZE(jpeg_reg_list_4_0_5));
+ if (r)
+ return r;
+
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return 0;
}
/**
* jpeg_v4_0_5_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v4_0_5_sw_fini(void *handle)
+static int jpeg_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -180,12 +209,12 @@ static int jpeg_v4_0_5_sw_fini(void *handle)
/**
* jpeg_v4_0_5_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v4_0_5_hw_init(void *handle)
+static int jpeg_v4_0_5_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r = 0;
@@ -210,16 +239,16 @@ static int jpeg_v4_0_5_hw_init(void *handle)
/**
* jpeg_v4_0_5_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v4_0_5_hw_fini(void *handle)
+static int jpeg_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
if (adev->jpeg.harvest_config & (1 << i))
@@ -228,7 +257,7 @@ static int jpeg_v4_0_5_hw_fini(void *handle)
if (!amdgpu_sriov_vf(adev)) {
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS))
- jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v4_0_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
}
}
return 0;
@@ -237,20 +266,19 @@ static int jpeg_v4_0_5_hw_fini(void *handle)
/**
* jpeg_v4_0_5_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v4_0_5_suspend(void *handle)
+static int jpeg_v4_0_5_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v4_0_5_hw_fini(adev);
+ r = jpeg_v4_0_5_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -258,20 +286,19 @@ static int jpeg_v4_0_5_suspend(void *handle)
/**
* jpeg_v4_0_5_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v4_0_5_resume(void *handle)
+static int jpeg_v4_0_5_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v4_0_5_hw_init(adev);
+ r = jpeg_v4_0_5_hw_init(ip_block);
return r;
}
@@ -621,9 +648,9 @@ static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
}
}
-static bool jpeg_v4_0_5_is_idle(void *handle)
+static bool jpeg_v4_0_5_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
@@ -637,9 +664,9 @@ static bool jpeg_v4_0_5_is_idle(void *handle)
return ret;
}
-static int jpeg_v4_0_5_wait_for_idle(void *handle)
+static int jpeg_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
@@ -654,10 +681,10 @@ static int jpeg_v4_0_5_wait_for_idle(void *handle)
return 0;
}
-static int jpeg_v4_0_5_set_clockgating_state(void *handle,
+static int jpeg_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
int i;
@@ -666,7 +693,7 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle,
continue;
if (enable) {
- if (!jpeg_v4_0_5_is_idle(handle))
+ if (!jpeg_v4_0_5_is_idle(ip_block))
return -EBUSY;
jpeg_v4_0_5_enable_clock_gating(adev, i);
@@ -678,10 +705,10 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v4_0_5_set_powergating_state(void *handle,
+static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (amdgpu_sriov_vf(adev)) {
@@ -743,7 +770,6 @@ static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev,
static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = {
.name = "jpeg_v4_0_5",
.early_init = jpeg_v4_0_5_early_init,
- .late_init = NULL,
.sw_init = jpeg_v4_0_5_sw_init,
.sw_fini = jpeg_v4_0_5_sw_fini,
.hw_init = jpeg_v4_0_5_hw_init,
@@ -752,14 +778,10 @@ static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = {
.resume = jpeg_v4_0_5_resume,
.is_idle = jpeg_v4_0_5_is_idle,
.wait_for_idle = jpeg_v4_0_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v4_0_5_set_clockgating_state,
.set_powergating_state = jpeg_v4_0_5_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
index d662aa841f97..31d213ccbe0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -31,24 +31,40 @@
#include "vcn/vcn_5_0_0_offset.h"
#include "vcn/vcn_5_0_0_sh_mask.h"
-#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
#include "jpeg_v5_0_0.h"
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+};
+
static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev);
-static int jpeg_v5_0_0_set_powergating_state(void *handle,
+static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state);
/**
* jpeg_v5_0_0_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int jpeg_v5_0_0_early_init(void *handle)
+static int jpeg_v5_0_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->jpeg.num_jpeg_inst = 1;
adev->jpeg.num_jpeg_rings = 1;
@@ -62,19 +78,19 @@ static int jpeg_v5_0_0_early_init(void *handle)
/**
* jpeg_v5_0_0_sw_init - sw init for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int jpeg_v5_0_0_sw_init(void *handle)
+static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r;
/* JPEG TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
- VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
+ VCN_5_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq);
if (r)
return r;
@@ -100,25 +116,36 @@ static int jpeg_v5_0_0_sw_init(void *handle)
adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET;
adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH);
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0, ARRAY_SIZE(jpeg_reg_list_5_0));
+ if (r)
+ return r;
+
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->jpeg.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
return 0;
}
/**
* jpeg_v5_0_0_sw_fini - sw fini for JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* JPEG suspend and free up sw allocation
*/
-static int jpeg_v5_0_0_sw_fini(void *handle)
+static int jpeg_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_jpeg_suspend(adev);
if (r)
return r;
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -127,12 +154,12 @@ static int jpeg_v5_0_0_sw_fini(void *handle)
/**
* jpeg_v5_0_0_hw_init - start and test JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
*/
-static int jpeg_v5_0_0_hw_init(void *handle)
+static int jpeg_v5_0_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec;
int r;
@@ -153,19 +180,19 @@ static int jpeg_v5_0_0_hw_init(void *handle)
/**
* jpeg_v5_0_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the JPEG block, mark ring as not ready any more
*/
-static int jpeg_v5_0_0_hw_fini(void *handle)
+static int jpeg_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
- jpeg_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ jpeg_v5_0_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
return 0;
}
@@ -173,20 +200,19 @@ static int jpeg_v5_0_0_hw_fini(void *handle)
/**
* jpeg_v5_0_0_suspend - suspend JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend JPEG block
*/
-static int jpeg_v5_0_0_suspend(void *handle)
+static int jpeg_v5_0_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = jpeg_v5_0_0_hw_fini(adev);
+ r = jpeg_v5_0_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_jpeg_suspend(adev);
+ r = amdgpu_jpeg_suspend(ip_block->adev);
return r;
}
@@ -194,20 +220,19 @@ static int jpeg_v5_0_0_suspend(void *handle)
/**
* jpeg_v5_0_0_resume - resume JPEG block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init JPEG block
*/
-static int jpeg_v5_0_0_resume(void *handle)
+static int jpeg_v5_0_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- r = amdgpu_jpeg_resume(adev);
+ r = amdgpu_jpeg_resume(ip_block->adev);
if (r)
return r;
- r = jpeg_v5_0_0_hw_init(adev);
+ r = jpeg_v5_0_0_hw_init(ip_block);
return r;
}
@@ -534,9 +559,9 @@ static void jpeg_v5_0_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
}
}
-static bool jpeg_v5_0_0_is_idle(void *handle)
+static bool jpeg_v5_0_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 1;
ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) &
@@ -546,23 +571,23 @@ static bool jpeg_v5_0_0_is_idle(void *handle)
return ret;
}
-static int jpeg_v5_0_0_wait_for_idle(void *handle)
+static int jpeg_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
}
-static int jpeg_v5_0_0_set_clockgating_state(void *handle,
+static int jpeg_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
if (enable) {
- if (!jpeg_v5_0_0_is_idle(handle))
+ if (!jpeg_v5_0_0_is_idle(ip_block))
return -EBUSY;
jpeg_v5_0_0_enable_clock_gating(adev);
} else {
@@ -572,10 +597,10 @@ static int jpeg_v5_0_0_set_clockgating_state(void *handle,
return 0;
}
-static int jpeg_v5_0_0_set_powergating_state(void *handle,
+static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
if (state == adev->jpeg.cur_state)
@@ -607,7 +632,7 @@ static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev,
DRM_DEBUG("IH: JPEG TRAP\n");
switch (entry->src_id) {
- case VCN_4_0__SRCID__JPEG_DECODE:
+ case VCN_5_0__SRCID__JPEG_DECODE:
amdgpu_fence_process(adev->jpeg.inst->ring_dec);
break;
default:
@@ -622,7 +647,6 @@ static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev,
static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = {
.name = "jpeg_v5_0_0",
.early_init = jpeg_v5_0_0_early_init,
- .late_init = NULL,
.sw_init = jpeg_v5_0_0_sw_init,
.sw_fini = jpeg_v5_0_0_sw_fini,
.hw_init = jpeg_v5_0_0_hw_init,
@@ -631,14 +655,10 @@ static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = {
.resume = jpeg_v5_0_0_resume,
.is_idle = jpeg_v5_0_0_is_idle,
.wait_for_idle = jpeg_v5_0_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = jpeg_v5_0_0_set_clockgating_state,
.set_powergating_state = jpeg_v5_0_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
};
static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
new file mode 100644
index 000000000000..3b6f65a25646
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c
@@ -0,0 +1,1093 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_jpeg.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "jpeg_v4_0_3.h"
+#include "jpeg_v5_0_1.h"
+#include "mmsch_v5_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+
+static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev);
+static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state);
+static void jpeg_v5_0_1_set_ras_funcs(struct amdgpu_device *adev);
+static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring);
+
+static int amdgpu_ih_srcid_jpeg[] = {
+ VCN_5_0__SRCID__JPEG_DECODE,
+ VCN_5_0__SRCID__JPEG1_DECODE,
+ VCN_5_0__SRCID__JPEG2_DECODE,
+ VCN_5_0__SRCID__JPEG3_DECODE,
+ VCN_5_0__SRCID__JPEG4_DECODE,
+ VCN_5_0__SRCID__JPEG5_DECODE,
+ VCN_5_0__SRCID__JPEG6_DECODE,
+ VCN_5_0__SRCID__JPEG7_DECODE,
+ VCN_5_0__SRCID__JPEG8_DECODE,
+ VCN_5_0__SRCID__JPEG9_DECODE,
+};
+
+static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0_1[] = {
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_STATUS),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_RPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_WPTR),
+ SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_STATUS),
+};
+
+static int jpeg_v5_0_1_core_reg_offset(u32 pipe)
+{
+ if (pipe <= AMDGPU_MAX_JPEG_RINGS_4_0_3)
+ return ((0x40 * pipe) - 0xc80);
+ else
+ return ((0x40 * pipe) - 0x440);
+}
+
+/**
+ * jpeg_v5_0_1_early_init - set function pointers
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ */
+static int jpeg_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+
+ if (!adev->jpeg.num_jpeg_inst || adev->jpeg.num_jpeg_inst > AMDGPU_MAX_JPEG_INSTANCES)
+ return -ENOENT;
+
+ adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
+ jpeg_v5_0_1_set_dec_ring_funcs(adev);
+ jpeg_v5_0_1_set_irq_funcs(adev);
+ jpeg_v5_0_1_set_ras_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_sw_init - sw init for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ /* JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq);
+ if (r)
+ return r;
+ }
+ /* JPEG DJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ /* JPEG EJPEG POISON EVENT */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_sw_init(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->use_doorbell = true;
+ ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id);
+ if (!amdgpu_sriov_vf(adev)) {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 1 + j + 11 * jpeg_inst;
+ } else {
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 2 + j + 32 * jpeg_inst;
+ }
+ sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
+ if (r)
+ return r;
+
+ adev->jpeg.internal.jpeg_pitch[j] =
+ regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET;
+ adev->jpeg.inst[i].external.jpeg_pitch[j] =
+ SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC_SCRATCH0,
+ (j ? jpeg_v5_0_1_core_reg_offset(j) : 0));
+ }
+ }
+
+ r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0_1, ARRAY_SIZE(jpeg_reg_list_5_0_1));
+ if (r)
+ return r;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE;
+ r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_sw_fini - sw fini for JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * JPEG suspend and free up sw allocation
+ */
+static int jpeg_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_suspend(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
+ r = amdgpu_jpeg_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_hw_init - start and test JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ */
+static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, j, r, jpeg_inst;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = jpeg_v5_0_1_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ jpeg_v5_0_1_dec_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ }
+ return 0;
+ }
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED);
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_inst = GET_INST(JPEG, i);
+ ring = adev->jpeg.inst[i].ring_dec;
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * jpeg_inst,
+ adev->jpeg.inst[i].aid_id);
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ if (ring->use_doorbell)
+ WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL,
+ ring->pipe,
+ ring->doorbell_index <<
+ VCN_JPEG_DB_CTRL__OFFSET__SHIFT |
+ VCN_JPEG_DB_CTRL__EN_MASK);
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the JPEG block, mark ring as not ready any more
+ */
+static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+
+ cancel_delayed_work_sync(&adev->jpeg.idle_work);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG))
+ amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+
+ return ret;
+}
+
+/**
+ * jpeg_v5_0_1_suspend - suspend JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend JPEG block
+ */
+static int jpeg_v5_0_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = jpeg_v5_0_1_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ r = amdgpu_jpeg_suspend(adev);
+
+ return r;
+}
+
+/**
+ * jpeg_v5_0_1_resume - resume JPEG block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init JPEG block
+ */
+static int jpeg_v5_0_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = amdgpu_jpeg_resume(adev);
+ if (r)
+ return r;
+
+ r = jpeg_v5_0_1_hw_init(ip_block);
+
+ return r;
+}
+
+static void jpeg_v5_0_1_init_inst(struct amdgpu_device *adev, int i)
+{
+ int jpeg_inst = GET_INST(JPEG, i);
+
+ /* disable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+
+ /* keep the JPEG in static PG mode */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0,
+ ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK);
+
+ /* MJPEG global tiling registers */
+ WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+}
+
+static void jpeg_v5_0_1_deinit_inst(struct amdgpu_device *adev, int i)
+{
+ int jpeg_inst = GET_INST(JPEG, i);
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable anti hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS),
+ UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
+ ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
+}
+
+static void jpeg_v5_0_1_init_jrbc(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 reg, data, mask;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0;
+
+ /* enable System Interrupt for JRBC */
+ reg = SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN);
+ if (ring->pipe < AMDGPU_MAX_JPEG_RINGS_4_0_3) {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe;
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe);
+ WREG32_P(reg, data, mask);
+ } else {
+ data = JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12);
+ mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12));
+ WREG32_P(reg, data, mask);
+ }
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_VMID,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset,
+ (0x00000001L | 0x00000002L));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ reg_offset, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ reg_offset, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_RPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_WPTR,
+ reg_offset, 0);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_CNTL,
+ reg_offset, 0x00000002L);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JRBC_RB_SIZE,
+ reg_offset, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC_RB_WPTR,
+ reg_offset);
+}
+
+static int jpeg_v5_0_1_start_sriov(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw, item_offset;
+ uint32_t init_status;
+ int i, j, jpeg_inst;
+
+ struct mmsch_v5_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v5_0_cmd_end end = { {0} };
+ struct mmsch_v5_0_init_header header;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ end.cmd_header.command_type =
+ MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ jpeg_inst = GET_INST(JPEG, i);
+
+ memset(&header, 0, sizeof(struct mmsch_v5_0_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ item_offset = header.total_size;
+
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ table_size = 0;
+
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr));
+ tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC_RB_SIZE);
+ MMSCH_V5_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4);
+
+ if (j < 5) {
+ header.mjpegdec0[j].table_offset = item_offset;
+ header.mjpegdec0[j].init_status = 0;
+ header.mjpegdec0[j].table_size = table_size;
+ } else {
+ header.mjpegdec1[j - 5].table_offset = item_offset;
+ header.mjpegdec1[j - 5].init_status = 0;
+ header.mjpegdec1[j - 5].table_size = table_size;
+ }
+ header.total_size += table_size;
+ item_offset += table_size;
+ }
+
+ MMSCH_V5_0_INSERT_END();
+
+ /* send init table to MMSCH */
+ size = sizeof(struct mmsch_v5_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ init_status =
+ ((struct mmsch_v5_0_init_header *)(table_loc))->mjpegdec0[i].init_status;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP);
+
+ if (resp != 0)
+ break;
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE &&
+ init_status != MMSCH_VF_ENGINE_STATUS__PASS)
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n",
+ resp, init_status);
+
+ }
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v5_0_1_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ jpeg_v5_0_1_init_inst(adev, i);
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ ring = &adev->jpeg.inst[i].ring_dec[j];
+ jpeg_v5_0_1_init_jrbc(ring);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v5_0_1_stop(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ jpeg_v5_0_1_deinit_inst(adev, i);
+
+ return 0;
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t jpeg_v5_0_1_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_RPTR,
+ ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t jpeg_v5_0_1_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+
+ return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_WPTR,
+ ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0);
+}
+
+/**
+ * jpeg_v5_0_1_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me),
+ regUVD_JRBC_RB_WPTR,
+ (ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0),
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static bool jpeg_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool ret = false;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0);
+
+ ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset) &
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK) ==
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+
+ return ret;
+}
+
+static int jpeg_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret = 0;
+ int i, j;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0);
+
+ ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i),
+ regUVD_JRBC_STATUS, reg_offset,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+ UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+ }
+ }
+ return ret;
+}
+
+static int jpeg_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+
+ int i;
+
+ if (!enable)
+ return 0;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ if (!jpeg_v5_0_1_is_idle(ip_block))
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int ret;
+
+ if (amdgpu_sriov_vf(adev)) {
+ adev->jpeg.cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == adev->jpeg.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = jpeg_v5_0_1_stop(adev);
+ else
+ ret = jpeg_v5_0_1_start(adev);
+
+ if (!ret)
+ adev->jpeg.cur_state = state;
+
+ return ret;
+}
+
+static int jpeg_v5_0_1_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int jpeg_v5_0_1_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+
+
+static int jpeg_v5_0_1_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+ DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n");
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst)
+ if (adev->jpeg.inst[inst].aid_id == i)
+ break;
+
+ if (inst >= adev->jpeg.num_jpeg_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown JPEG instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]);
+ break;
+ case VCN_5_0__SRCID__JPEG1_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]);
+ break;
+ case VCN_5_0__SRCID__JPEG2_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]);
+ break;
+ case VCN_5_0__SRCID__JPEG3_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
+ break;
+ case VCN_5_0__SRCID__JPEG4_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
+ break;
+ case VCN_5_0__SRCID__JPEG5_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
+ break;
+ case VCN_5_0__SRCID__JPEG6_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
+ break;
+ case VCN_5_0__SRCID__JPEG7_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
+ break;
+ case VCN_5_0__SRCID__JPEG8_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[8]);
+ break;
+ case VCN_5_0__SRCID__JPEG9_DECODE:
+ amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[9]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static void jpeg_v5_0_1_core_stall_reset(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int jpeg_inst = GET_INST(JPEG, ring->me);
+ int reg_offset = ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0;
+
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x1F);
+ SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS,
+ reg_offset, 0x1F, 0x1F);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x1F);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_UVD_JMI_CLIENT_STALL,
+ reg_offset, 0x00);
+ WREG32_SOC15_OFFSET(JPEG, jpeg_inst,
+ regUVD_JMI0_JPEG_LMI_DROP,
+ reg_offset, 0x00);
+ WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00);
+}
+
+static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ if (amdgpu_sriov_vf(ring->adev))
+ return -EOPNOTSUPP;
+
+ jpeg_v5_0_1_core_stall_reset(ring);
+ jpeg_v5_0_1_init_jrbc(ring);
+ return amdgpu_ring_test_helper(ring);
+}
+
+static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = {
+ .name = "jpeg_v5_0_1",
+ .early_init = jpeg_v5_0_1_early_init,
+ .late_init = NULL,
+ .sw_init = jpeg_v5_0_1_sw_init,
+ .sw_fini = jpeg_v5_0_1_sw_fini,
+ .hw_init = jpeg_v5_0_1_hw_init,
+ .hw_fini = jpeg_v5_0_1_hw_fini,
+ .suspend = jpeg_v5_0_1_suspend,
+ .resume = jpeg_v5_0_1_resume,
+ .is_idle = jpeg_v5_0_1_is_idle,
+ .wait_for_idle = jpeg_v5_0_1_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = jpeg_v5_0_1_set_clockgating_state,
+ .set_powergating_state = jpeg_v5_0_1_set_powergating_state,
+ .dump_ip_state = amdgpu_jpeg_dump_ip_state,
+ .print_ip_state = amdgpu_jpeg_print_ip_state,
+};
+
+static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr,
+ .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr,
+ .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* jpeg_v5_0_1_dec_ring_emit_vm_flush */
+ 22 + 22 + /* jpeg_v5_0_1_dec_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* jpeg_v5_0_1_dec_ring_emit_ib */
+ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib,
+ .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence,
+ .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush,
+ .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_jpeg_dec_ring_test_ring,
+ .test_ib = amdgpu_jpeg_dec_ring_test_ib,
+ .insert_nop = jpeg_v4_0_3_dec_ring_nop,
+ .insert_start = jpeg_v4_0_3_dec_ring_insert_start,
+ .insert_end = jpeg_v4_0_3_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_jpeg_ring_begin_use,
+ .end_use = amdgpu_jpeg_ring_end_use,
+ .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
+ .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = jpeg_v5_0_1_ring_reset,
+};
+
+static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, j, jpeg_inst;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+ for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+ adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v5_0_1_dec_ring_vm_funcs;
+ adev->jpeg.inst[i].ring_dec[j].me = i;
+ adev->jpeg.inst[i].ring_dec[j].pipe = j;
+ }
+ jpeg_inst = GET_INST(JPEG, i);
+ adev->jpeg.inst[i].aid_id =
+ jpeg_inst / adev->jpeg.num_inst_per_aid;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_irq_funcs = {
+ .set = jpeg_v5_0_1_set_interrupt_state,
+ .process = jpeg_v5_0_1_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_ras_irq_funcs = {
+ .set = jpeg_v5_0_1_set_ras_interrupt_state,
+ .process = amdgpu_jpeg_process_poison_irq,
+};
+
+static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i)
+ adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings;
+
+ adev->jpeg.inst->irq.funcs = &jpeg_v5_0_1_irq_funcs;
+
+ adev->jpeg.inst->ras_poison_irq.num_types = 1;
+ adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v5_0_1_ras_irq_funcs;
+
+}
+
+const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_JPEG,
+ .major = 5,
+ .minor = 0,
+ .rev = 1,
+ .funcs = &jpeg_v5_0_1_ip_funcs,
+};
+
+static uint32_t jpeg_v5_0_1_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V5_0_1_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V5_0_1_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v5_0_1_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V5_0_1_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v5_0_1_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+static const struct amdgpu_ras_block_hw_ops jpeg_v5_0_1_ras_hw_ops = {
+ .query_poison_status = jpeg_v5_0_1_query_ras_poison_status,
+};
+
+static int jpeg_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int jpeg_v5_0_1_err_codes[] = {
+ 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */
+ 24, 25, 26, 27, 28, 29, 30, 31
+};
+
+static bool jpeg_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ jpeg_v5_0_1_err_codes,
+ ARRAY_SIZE(jpeg_v5_0_1_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops jpeg_v5_0_1_aca_bank_ops = {
+ .aca_bank_parser = jpeg_v5_0_1_aca_bank_parser,
+ .aca_bank_is_valid = jpeg_v5_0_1_aca_bank_is_valid,
+};
+
+static const struct aca_info jpeg_v5_0_1_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &jpeg_v5_0_1_aca_bank_ops,
+};
+
+static int jpeg_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->jpeg.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->jpeg.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG,
+ &jpeg_v5_0_1_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+static struct amdgpu_jpeg_ras jpeg_v5_0_1_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v5_0_1_ras_hw_ops,
+ .ras_late_init = jpeg_v5_0_1_ras_late_init,
+ },
+};
+
+static void jpeg_v5_0_1_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->jpeg.ras = &jpeg_v5_0_1_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h
new file mode 100644
index 000000000000..a7e58d5fb246
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __JPEG_V5_0_1_H__
+#define __JPEG_V5_0_1_H__
+
+extern const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block;
+
+#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094
+#define regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET 0x1bffe
+
+#define regUVD_JRBC0_UVD_JRBC_RB_WPTR 0x0640
+#define regUVD_JRBC0_UVD_JRBC_RB_WPTR_BASE_IDX 1
+#define regUVD_JRBC0_UVD_JRBC_STATUS 0x0649
+#define regUVD_JRBC0_UVD_JRBC_STATUS_BASE_IDX 1
+#define regUVD_JRBC0_UVD_JRBC_RB_RPTR 0x064a
+#define regUVD_JRBC0_UVD_JRBC_RB_RPTR_BASE_IDX 1
+#define regUVD_JRBC1_UVD_JRBC_RB_WPTR 0x0000
+#define regUVD_JRBC1_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC1_UVD_JRBC_STATUS 0x0009
+#define regUVD_JRBC1_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC1_UVD_JRBC_RB_RPTR 0x000a
+#define regUVD_JRBC1_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_RB_WPTR 0x0040
+#define regUVD_JRBC2_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_STATUS 0x0049
+#define regUVD_JRBC2_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC2_UVD_JRBC_RB_RPTR 0x004a
+#define regUVD_JRBC2_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_RB_WPTR 0x0080
+#define regUVD_JRBC3_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_STATUS 0x0089
+#define regUVD_JRBC3_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC3_UVD_JRBC_RB_RPTR 0x008a
+#define regUVD_JRBC3_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_RB_WPTR 0x00c0
+#define regUVD_JRBC4_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_STATUS 0x00c9
+#define regUVD_JRBC4_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC4_UVD_JRBC_RB_RPTR 0x00ca
+#define regUVD_JRBC4_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_RB_WPTR 0x0100
+#define regUVD_JRBC5_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_STATUS 0x0109
+#define regUVD_JRBC5_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC5_UVD_JRBC_RB_RPTR 0x010a
+#define regUVD_JRBC5_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_RB_WPTR 0x0140
+#define regUVD_JRBC6_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_STATUS 0x0149
+#define regUVD_JRBC6_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC6_UVD_JRBC_RB_RPTR 0x014a
+#define regUVD_JRBC6_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_RB_WPTR 0x0180
+#define regUVD_JRBC7_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_STATUS 0x0189
+#define regUVD_JRBC7_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC7_UVD_JRBC_RB_RPTR 0x018a
+#define regUVD_JRBC7_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_RB_WPTR 0x01c0
+#define regUVD_JRBC8_UVD_JRBC_RB_WPTR_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_STATUS 0x01c9
+#define regUVD_JRBC8_UVD_JRBC_STATUS_BASE_IDX 0
+#define regUVD_JRBC8_UVD_JRBC_RB_RPTR 0x01ca
+#define regUVD_JRBC8_UVD_JRBC_RB_RPTR_BASE_IDX 0
+#define regUVD_JRBC9_UVD_JRBC_RB_WPTR 0x0440
+#define regUVD_JRBC9_UVD_JRBC_RB_WPTR_BASE_IDX 1
+#define regUVD_JRBC9_UVD_JRBC_STATUS 0x0449
+#define regUVD_JRBC9_UVD_JRBC_STATUS_BASE_IDX 1
+#define regUVD_JRBC9_UVD_JRBC_RB_RPTR 0x044a
+#define regUVD_JRBC9_UVD_JRBC_RB_RPTR_BASE_IDX 1
+#define regUVD_JMI0_JPEG_LMI_DROP 0x0663
+#define regUVD_JMI0_JPEG_LMI_DROP_BASE_IDX 1
+#define regUVD_JMI0_UVD_JMI_CLIENT_STALL 0x067a
+#define regUVD_JMI0_UVD_JMI_CLIENT_STALL_BASE_IDX 1
+#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS 0x067b
+#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS_BASE_IDX 1
+#define regJPEG_CORE_RST_CTRL 0x072e
+#define regJPEG_CORE_RST_CTRL_BASE_IDX 1
+
+#define regVCN_RRMT_CNTL 0x0940
+#define regVCN_RRMT_CNTL_BASE_IDX 1
+
+enum amdgpu_jpeg_v5_0_1_sub_block {
+ AMDGPU_JPEG_V5_0_1_JPEG0 = 0,
+ AMDGPU_JPEG_V5_0_1_JPEG1,
+
+ AMDGPU_JPEG_V5_0_1_MAX_SUB_BLOCK,
+};
+
+#endif /* __JPEG_V5_0_1_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
new file mode 100644
index 000000000000..d6f50b13e2ba
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -0,0 +1,355 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
+
+#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
+#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
+
+static int
+mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
+{
+ int ret;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
+ goto err_reserve_bo_failed;
+ }
+
+ ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (ret) {
+ DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
+ goto err_map_bo_gart_failed;
+ }
+
+ amdgpu_bo_unreserve(bo);
+ bo = amdgpu_bo_ref(bo);
+
+ return 0;
+
+err_map_bo_gart_failed:
+ amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+ return ret;
+}
+
+static int
+mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ uint64_t wptr)
+{
+ struct amdgpu_bo_va_mapping *wptr_mapping;
+ struct amdgpu_vm *wptr_vm;
+ struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
+ int ret;
+
+ wptr_vm = queue->vm;
+ ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+ if (ret)
+ return ret;
+
+ wptr &= AMDGPU_GMC_HOLE_MASK;
+ wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
+ amdgpu_bo_unreserve(wptr_vm->root.bo);
+ if (!wptr_mapping) {
+ DRM_ERROR("Failed to lookup wptr bo\n");
+ return -EINVAL;
+ }
+
+ wptr_obj->obj = wptr_mapping->bo_va->base.bo;
+ if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
+ DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n");
+ return -EINVAL;
+ }
+
+ ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
+ if (ret) {
+ DRM_ERROR("Failed to map wptr bo to GART\n");
+ return ret;
+ }
+
+ queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
+ return 0;
+}
+
+static int convert_to_mes_priority(int priority)
+{
+ switch (priority) {
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW:
+ default:
+ return AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW:
+ return AMDGPU_MES_PRIORITY_LEVEL_LOW;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH:
+ return AMDGPU_MES_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH:
+ return AMDGPU_MES_PRIORITY_LEVEL_HIGH;
+ }
+}
+
+static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ struct amdgpu_mqd_prop *userq_props = queue->userq_prop;
+ struct mes_add_queue_input queue_input;
+ int r;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+
+ queue_input.process_va_start = 0;
+ queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+
+ /* set process quantum to 10 ms and gang quantum to 1 ms as default */
+ queue_input.process_quantum = 100000;
+ queue_input.gang_quantum = 10000;
+ queue_input.paging = false;
+
+ queue_input.process_context_addr = ctx->gpu_addr;
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+ queue_input.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+ queue_input.gang_global_priority_level = convert_to_mes_priority(queue->priority);
+
+ queue_input.process_id = queue->vm->pasid;
+ queue_input.queue_type = queue->queue_type;
+ queue_input.mqd_addr = queue->mqd.gpu_addr;
+ queue_input.wptr_addr = userq_props->wptr_gpu_addr;
+ queue_input.queue_size = userq_props->queue_size >> 2;
+ queue_input.doorbell_offset = userq_props->doorbell_index;
+ queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
+ queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r) {
+ DRM_ERROR("Failed to map queue in HW, err (%d)\n", r);
+ return r;
+ }
+
+ DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index);
+ return 0;
+}
+
+static int mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct mes_remove_queue_input queue_input;
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r;
+
+ memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+ queue_input.doorbell_offset = queue->doorbell_index;
+ queue_input.gang_context_addr = ctx->gpu_addr + AMDGPU_USERQ_PROC_CTX_SZ;
+
+ amdgpu_mes_lock(&adev->mes);
+ r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+ amdgpu_mes_unlock(&adev->mes);
+ if (r)
+ DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r);
+ return r;
+}
+
+static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue,
+ struct drm_amdgpu_userq_in *mqd_user)
+{
+ struct amdgpu_userq_obj *ctx = &queue->fw_obj;
+ int r, size;
+
+ /*
+ * The FW expects at least one page space allocated for
+ * process ctx and gang ctx each. Create an object
+ * for the same.
+ */
+ size = AMDGPU_USERQ_PROC_CTX_SZ + AMDGPU_USERQ_GANG_CTX_SZ;
+ r = amdgpu_userq_create_object(uq_mgr, ctx, size);
+ if (r) {
+ DRM_ERROR("Failed to allocate ctx space bo for userqueue, err:%d\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+ struct drm_amdgpu_userq_in *args_in,
+ struct amdgpu_usermode_queue *queue)
+{
+ struct amdgpu_device *adev = uq_mgr->adev;
+ struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
+ struct drm_amdgpu_userq_in *mqd_user = args_in;
+ struct amdgpu_mqd_prop *userq_props;
+ int r;
+
+ /* Structure to initialize MQD for userqueue using generic MQD init function */
+ userq_props = kzalloc(sizeof(struct amdgpu_mqd_prop), GFP_KERNEL);
+ if (!userq_props) {
+ DRM_ERROR("Failed to allocate memory for userq_props\n");
+ return -ENOMEM;
+ }
+
+ if (!mqd_user->wptr_va || !mqd_user->rptr_va ||
+ !mqd_user->queue_va || mqd_user->queue_size == 0) {
+ DRM_ERROR("Invalid MQD parameters for userqueue\n");
+ r = -EINVAL;
+ goto free_props;
+ }
+
+ r = amdgpu_userq_create_object(uq_mgr, &queue->mqd, mqd_hw_default->mqd_size);
+ if (r) {
+ DRM_ERROR("Failed to create MQD object for userqueue\n");
+ goto free_props;
+ }
+
+ /* Initialize the MQD BO with user given values */
+ userq_props->wptr_gpu_addr = mqd_user->wptr_va;
+ userq_props->rptr_gpu_addr = mqd_user->rptr_va;
+ userq_props->queue_size = mqd_user->queue_size;
+ userq_props->hqd_base_gpu_addr = mqd_user->queue_va;
+ userq_props->mqd_gpu_addr = queue->mqd.gpu_addr;
+ userq_props->use_doorbell = true;
+ userq_props->doorbell_index = queue->doorbell_index;
+ userq_props->fence_address = queue->fence_drv->gpu_addr;
+
+ if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
+ struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
+
+ if (mqd_user->mqd_size != sizeof(*compute_mqd)) {
+ DRM_ERROR("Invalid compute IP MQD size\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ compute_mqd = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(compute_mqd)) {
+ DRM_ERROR("Failed to read user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+
+ userq_props->eop_gpu_addr = compute_mqd->eop_va;
+ userq_props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
+ userq_props->hqd_active = false;
+ userq_props->tmz_queue =
+ mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+ kfree(compute_mqd);
+ } else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
+ struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
+
+ if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) {
+ DRM_ERROR("Invalid GFX MQD\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(mqd_gfx_v11)) {
+ DRM_ERROR("Failed to read user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+
+ userq_props->shadow_addr = mqd_gfx_v11->shadow_va;
+ userq_props->csa_addr = mqd_gfx_v11->csa_va;
+ userq_props->tmz_queue =
+ mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+ kfree(mqd_gfx_v11);
+ } else if (queue->queue_type == AMDGPU_HW_IP_DMA) {
+ struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11;
+
+ if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) {
+ DRM_ERROR("Invalid SDMA MQD\n");
+ r = -EINVAL;
+ goto free_mqd;
+ }
+
+ mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+ if (IS_ERR(mqd_sdma_v11)) {
+ DRM_ERROR("Failed to read sdma user MQD\n");
+ r = -ENOMEM;
+ goto free_mqd;
+ }
+
+ userq_props->csa_addr = mqd_sdma_v11->csa_va;
+ kfree(mqd_sdma_v11);
+ }
+
+ queue->userq_prop = userq_props;
+
+ r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props);
+ if (r) {
+ DRM_ERROR("Failed to initialize MQD for userqueue\n");
+ goto free_mqd;
+ }
+
+ /* Create BO for FW operations */
+ r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
+ if (r) {
+ DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
+ goto free_mqd;
+ }
+
+ /* FW expects WPTR BOs to be mapped into GART */
+ r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr);
+ if (r) {
+ DRM_ERROR("Failed to create WPTR mapping\n");
+ goto free_ctx;
+ }
+
+ return 0;
+
+free_ctx:
+ amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+
+free_mqd:
+ amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+
+free_props:
+ kfree(userq_props);
+
+ return r;
+}
+
+static void
+mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
+ struct amdgpu_usermode_queue *queue)
+{
+ amdgpu_userq_destroy_object(uq_mgr, &queue->fw_obj);
+ kfree(queue->userq_prop);
+ amdgpu_userq_destroy_object(uq_mgr, &queue->mqd);
+}
+
+const struct amdgpu_userq_funcs userq_mes_funcs = {
+ .mqd_create = mes_userq_mqd_create,
+ .mqd_destroy = mes_userq_mqd_destroy,
+ .unmap = mes_userq_unmap,
+ .map = mes_userq_map,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
new file mode 100644
index 000000000000..090ae8897770
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef MES_USERQ_H
+#define MES_USERQ_H
+#include "amdgpu_userq.h"
+
+extern const struct amdgpu_userq_funcs userq_mes_funcs;
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 231a3d490ea8..28eb846280dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -54,14 +54,17 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes_2.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes1.bin");
-static int mes_v11_0_hw_init(void *handle);
-static int mes_v11_0_hw_fini(void *handle);
+static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block);
+static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block);
static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
#define MES_EOP_SIZE 2048
#define GFX_MES_DRAM_SIZE 0x80000
+#define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE)
static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring)
{
@@ -284,6 +287,23 @@ static int convert_to_mes_queue_type(int queue_type)
return -1;
}
+static int convert_to_mes_priority_level(int priority_level)
+{
+ switch (priority_level) {
+ case AMDGPU_MES_PRIORITY_LEVEL_LOW:
+ return AMD_PRIORITY_LEVEL_LOW;
+ case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
+ default:
+ return AMD_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
+ return AMD_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
+ return AMD_PRIORITY_LEVEL_HIGH;
+ case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
+ return AMD_PRIORITY_LEVEL_REALTIME;
+ }
+}
+
static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
struct mes_add_queue_input *input)
{
@@ -307,9 +327,9 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gang_quantum = input->gang_quantum;
mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
mes_add_queue_pkt.inprocess_gang_priority =
- input->inprocess_gang_priority;
+ convert_to_mes_priority_level(input->inprocess_gang_priority);
mes_add_queue_pkt.gang_global_priority_level =
- input->gang_global_priority_level;
+ convert_to_mes_priority_level(input->gang_global_priority_level);
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
@@ -366,7 +386,7 @@ static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ
uint32_t queue_id, uint32_t vmid)
{
struct amdgpu_device *adev = mes->adev;
- uint32_t value;
+ uint32_t value, reg;
int i, r = 0;
amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
@@ -424,37 +444,37 @@ static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ
}
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ switch (me_id) {
+ case 1:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
+ break;
+ case 0:
+ default:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
+ break;
+ }
+
+ value = 1 << queue_id;
+ WREG32(reg, value);
+ /* wait for queue reset done */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(reg) & value))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
+ r = -ETIMEDOUT;
+ }
}
amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
return r;
}
-static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
- struct mes_reset_queue_input *input)
-{
- if (input->use_mmio)
- return mes_v11_0_reset_queue_mmio(mes, input->queue_type,
- input->me_id, input->pipe_id,
- input->queue_id, input->vmid);
-
- union MESAPI__RESET mes_reset_queue_pkt;
-
- memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
-
- mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
- mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
-
- mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
- mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr;
- /*mes_reset_queue_pkt.reset_queue_only = 1;*/
-
- return mes_v11_0_submit_pkt_and_poll_completion(mes,
- &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
- offsetof(union MESAPI__REMOVE_QUEUE, api_status));
-}
-
static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes,
struct mes_map_legacy_queue_input *input)
{
@@ -619,6 +639,18 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
break;
+ case MES_MISC_OP_CHANGE_CONFIG:
+ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) {
+ dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n");
+ return -EINVAL;
+ }
+ misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
+ misc_pkt.change_config.opcode =
+ MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
+ misc_pkt.change_config.option.bits.limit_single_process =
+ input->change_config.option.limit_single_process;
+ break;
+
default:
DRM_ERROR("unsupported misc op (%d) \n", input->op);
return -EINVAL;
@@ -654,7 +686,8 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes->compute_hqd_mask[i];
for (i = 0; i < MAX_GFX_PIPES; i++)
- mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i];
+ mes_set_hw_res_pkt.gfx_hqd_mask[i] =
+ mes->gfx_hqd_mask[i];
for (i = 0; i < MAX_SDMA_PIPES; i++)
mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
@@ -683,6 +716,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes->event_log_gpu_addr;
}
+ if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ mes_set_hw_res_pkt.limit_single_process = 1;
+
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
@@ -690,9 +726,6 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
{
- int size = 128 * PAGE_SIZE;
- int ret = 0;
- struct amdgpu_device *adev = mes->adev;
union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt;
memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
@@ -701,25 +734,20 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_set_hw_res_pkt.enable_mes_info_ctx = 1;
- ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &mes->resource_1,
- &mes->resource_1_gpu_addr,
- &mes->resource_1_addr);
- if (ret) {
- dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", ret);
- return ret;
+ mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr = mes->resource_1_gpu_addr[0];
+ if (amdgpu_sriov_is_mes_info_enable(mes->adev)) {
+ mes_set_hw_res_pkt.mes_info_ctx_mc_addr =
+ mes->resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE;
+ mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE;
}
- mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr;
- mes_set_hw_res_pkt.mes_info_ctx_size = mes->resource_1->tbo.base.size;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
}
-static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes,
- struct mes_reset_legacy_queue_input *input)
+static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input)
{
union MESAPI__RESET mes_reset_queue_pkt;
@@ -737,7 +765,7 @@ static int mes_v11_0_reset_legacy_queue(struct amdgpu_mes *mes,
mes_reset_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
- if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
+ if (input->legacy_gfx) {
mes_reset_queue_pkt.reset_legacy_gfx = 1;
mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
mes_reset_queue_pkt.queue_id_lp = input->queue_id;
@@ -763,12 +791,11 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
.suspend_gang = mes_v11_0_suspend_gang,
.resume_gang = mes_v11_0_resume_gang,
.misc_op = mes_v11_0_misc_op,
- .reset_legacy_queue = mes_v11_0_reset_legacy_queue,
.reset_hw_queue = mes_v11_0_reset_hw_queue,
};
static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r;
const struct mes_firmware_header_v1_0 *mes_hdr;
@@ -803,7 +830,7 @@ static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev,
}
static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r;
const struct mes_firmware_header_v1_0 *mes_hdr;
@@ -844,7 +871,7 @@ static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
}
static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
&adev->mes.data_fw_gpu_addr[pipe],
@@ -859,6 +886,10 @@ static void mes_v11_0_get_fw_version(struct amdgpu_device *adev)
{
int pipe;
+ /* return early if we have already fetched these */
+ if (adev->mes.sched_version && adev->mes.kiq_version)
+ return;
+
/* get MES scheduler/KIQ versions */
mutex_lock(&adev->srbm_mutex);
@@ -883,6 +914,16 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)
uint32_t pipe, data = 0;
if (enable) {
+ if (amdgpu_mes_log_enable) {
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
+ lower_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE));
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
+ upper_32_bits(adev->mes.event_log_gpu_addr + AMDGPU_MES_LOG_BUFFER_SIZE));
+ dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n",
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
+ }
+
data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
data = REG_SET_FIELD(data, CP_MES_CNTL,
@@ -932,7 +973,7 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)
/* This function is for backdoor MES firmware */
static int mes_v11_0_load_microcode(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe, bool prime_icache)
+ enum amdgpu_mes_pipe pipe, bool prime_icache)
{
int r;
uint32_t data;
@@ -1004,7 +1045,7 @@ static int mes_v11_0_load_microcode(struct amdgpu_device *adev,
}
static int mes_v11_0_allocate_eop_buf(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r;
u32 *eop;
@@ -1215,7 +1256,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev)
}
static int mes_v11_0_queue_init(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
struct amdgpu_ring *ring;
int r;
@@ -1298,7 +1339,7 @@ static int mes_v11_0_kiq_ring_init(struct amdgpu_device *adev)
}
static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r, mqd_size = sizeof(struct v11_compute_mqd);
struct amdgpu_ring *ring;
@@ -1336,16 +1377,16 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
return 0;
}
-static int mes_v11_0_sw_init(void *handle)
+static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int pipe, r;
+ struct amdgpu_device *adev = ip_block->adev;
+ int pipe, r, bo_size;
adev->mes.funcs = &mes_v11_0_funcs;
adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
- adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE;
+ adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE;
r = amdgpu_mes_init(adev);
if (r)
@@ -1374,14 +1415,34 @@ static int mes_v11_0_sw_init(void *handle)
if (r)
return r;
+ bo_size = AMDGPU_GPU_PAGE_SIZE;
+ if (amdgpu_sriov_is_mes_info_enable(adev))
+ bo_size += MES11_HW_RESOURCE_1_SIZE;
+
+ /* Only needed for AMDGPU_MES_SCHED_PIPE on MES 11*/
+ r = amdgpu_bo_create_kernel(adev,
+ bo_size,
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.resource_1[0],
+ &adev->mes.resource_1_gpu_addr[0],
+ &adev->mes.resource_1_addr[0]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r);
+ return r;
+ }
+
return 0;
}
-static int mes_v11_0_sw_fini(void *handle)
+static int mes_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int pipe;
+ amdgpu_bo_free_kernel(&adev->mes.resource_1[0], &adev->mes.resource_1_gpu_addr[0],
+ &adev->mes.resource_1_addr[0]);
+
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
kfree(adev->mes.mqd_backup[pipe]);
@@ -1455,9 +1516,7 @@ static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static void mes_v11_0_kiq_clear(struct amdgpu_device *adev)
@@ -1473,6 +1532,7 @@ static void mes_v11_0_kiq_clear(struct amdgpu_device *adev)
static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
{
int r = 0;
+ struct amdgpu_ip_block *ip_block;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
@@ -1496,6 +1556,12 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
mes_v11_0_kiq_setting(&adev->gfx.kiq[0].ring);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
+ if (unlikely(!ip_block)) {
+ dev_err(adev->dev, "Failed to get MES handle\n");
+ return -EINVAL;
+ }
+
r = mes_v11_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
if (r)
goto failure;
@@ -1506,7 +1572,7 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
adev->mes.enable_legacy_queue_map = false;
if (adev->mes.enable_legacy_queue_map) {
- r = mes_v11_0_hw_init(adev);
+ r = mes_v11_0_hw_init(ip_block);
if (r)
goto failure;
}
@@ -1514,7 +1580,7 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
return r;
failure:
- mes_v11_0_hw_fini(adev);
+ mes_v11_0_hw_fini(ip_block);
return r;
}
@@ -1535,10 +1601,10 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
return 0;
}
-static int mes_v11_0_hw_init(void *handle)
+static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->mes.ring[0].sched.ready)
goto out;
@@ -1564,7 +1630,7 @@ static int mes_v11_0_hw_init(void *handle)
if (r)
goto failure;
- if (amdgpu_sriov_is_mes_info_enable(adev)) {
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x50) {
r = mes_v11_0_set_hw_resources_1(&adev->mes);
if (r) {
DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
@@ -1578,6 +1644,10 @@ static int mes_v11_0_hw_init(void *handle)
goto failure;
}
+ r = amdgpu_mes_update_enforce_isolation(adev);
+ if (r)
+ goto failure;
+
out:
/*
* Disable KIQ ring usage from the driver once MES is enabled.
@@ -1590,47 +1660,28 @@ out:
return 0;
failure:
- mes_v11_0_hw_fini(adev);
+ mes_v11_0_hw_fini(ip_block);
return r;
}
-static int mes_v11_0_hw_fini(void *handle)
+static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (amdgpu_sriov_is_mes_info_enable(adev)) {
- amdgpu_bo_free_kernel(&adev->mes.resource_1, &adev->mes.resource_1_gpu_addr,
- &adev->mes.resource_1_addr);
- }
return 0;
}
-static int mes_v11_0_suspend(void *handle)
+static int mes_v11_0_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = amdgpu_mes_suspend(adev);
- if (r)
- return r;
-
- return mes_v11_0_hw_fini(adev);
+ return mes_v11_0_hw_fini(ip_block);
}
-static int mes_v11_0_resume(void *handle)
+static int mes_v11_0_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = mes_v11_0_hw_init(adev);
- if (r)
- return r;
-
- return amdgpu_mes_resume(adev);
+ return mes_v11_0_hw_init(ip_block);
}
-static int mes_v11_0_early_init(void *handle)
+static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int pipe, r;
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
@@ -1644,30 +1695,16 @@ static int mes_v11_0_early_init(void *handle)
return 0;
}
-static int mes_v11_0_late_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- /* it's only intended for use in mes_self_test case, not for s0ix and reset */
- if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend &&
- (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3)))
- amdgpu_mes_self_test(adev);
-
- return 0;
-}
-
static const struct amd_ip_funcs mes_v11_0_ip_funcs = {
.name = "mes_v11_0",
.early_init = mes_v11_0_early_init,
- .late_init = mes_v11_0_late_init,
+ .late_init = NULL,
.sw_init = mes_v11_0_sw_init,
.sw_fini = mes_v11_0_sw_fini,
.hw_init = mes_v11_0_hw_init,
.hw_fini = mes_v11_0_hw_fini,
.suspend = mes_v11_0_suspend,
.resume = mes_v11_0_resume,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version mes_v11_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
index a37a6801c9ea..6b222630f3fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
@@ -24,6 +24,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
#include "amdgpu.h"
+#include "gfx_v12_0.h"
#include "soc15_common.h"
#include "soc21.h"
#include "gc/gc_12_0_0_offset.h"
@@ -39,8 +40,8 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes.bin");
MODULE_FIRMWARE("amdgpu/gc_12_0_1_mes1.bin");
MODULE_FIRMWARE("amdgpu/gc_12_0_1_uni_mes.bin");
-static int mes_v12_0_hw_init(void *handle);
-static int mes_v12_0_hw_fini(void *handle);
+static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block);
+static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block);
static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev);
static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev);
@@ -273,6 +274,23 @@ static int convert_to_mes_queue_type(int queue_type)
return -1;
}
+static int convert_to_mes_priority_level(int priority_level)
+{
+ switch (priority_level) {
+ case AMDGPU_MES_PRIORITY_LEVEL_LOW:
+ return AMD_PRIORITY_LEVEL_LOW;
+ case AMDGPU_MES_PRIORITY_LEVEL_NORMAL:
+ default:
+ return AMD_PRIORITY_LEVEL_NORMAL;
+ case AMDGPU_MES_PRIORITY_LEVEL_MEDIUM:
+ return AMD_PRIORITY_LEVEL_MEDIUM;
+ case AMDGPU_MES_PRIORITY_LEVEL_HIGH:
+ return AMD_PRIORITY_LEVEL_HIGH;
+ case AMDGPU_MES_PRIORITY_LEVEL_REALTIME:
+ return AMD_PRIORITY_LEVEL_REALTIME;
+ }
+}
+
static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
struct mes_add_queue_input *input)
{
@@ -296,9 +314,9 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gang_quantum = input->gang_quantum;
mes_add_queue_pkt.gang_context_addr = input->gang_context_addr;
mes_add_queue_pkt.inprocess_gang_priority =
- input->inprocess_gang_priority;
+ convert_to_mes_priority_level(input->inprocess_gang_priority);
mes_add_queue_pkt.gang_global_priority_level =
- input->gang_global_priority_level;
+ convert_to_mes_priority_level(input->gang_global_priority_level);
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
@@ -350,30 +368,130 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes,
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
-static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
- struct mes_reset_queue_input *input)
+int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev,
+ bool req)
{
- union MESAPI__RESET mes_reset_queue_pkt;
- int pipe;
+ u32 i, tmp, val;
- memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* Request with MeId=2, PipeId=0 */
+ tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
+ WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
- mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
- mes_reset_queue_pkt.header.opcode = MES_SCH_API_RESET;
- mes_reset_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
+ if (req) {
+ if (val == tmp)
+ break;
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
+ REQUEST, 1);
- mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
- mes_reset_queue_pkt.gang_context_addr = input->gang_context_addr;
- /*mes_reset_queue_pkt.reset_queue_only = 1;*/
+ /* unlocked or locked by firmware */
+ if (val != tmp)
+ break;
+ }
+ udelay(1);
+ }
- if (mes->adev->enable_uni_mes)
- pipe = AMDGPU_MES_KIQ_PIPE;
- else
- pipe = AMDGPU_MES_SCHED_PIPE;
+ if (i >= adev->usec_timeout)
+ return -EINVAL;
- return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
- &mes_reset_queue_pkt, sizeof(mes_reset_queue_pkt),
- offsetof(union MESAPI__REMOVE_QUEUE, api_status));
+ return 0;
+}
+
+static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type,
+ uint32_t me_id, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t vmid)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t value, reg;
+ int i, r = 0;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ if (queue_type == AMDGPU_RING_TYPE_GFX) {
+ dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n",
+ me_id, pipe_id, queue_id, vmid);
+
+ mutex_lock(&adev->gfx.reset_sem_mutex);
+ gfx_v12_0_request_gfx_index_mutex(adev, true);
+ /* all se allow writes */
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX,
+ (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT));
+ value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ if (pipe_id == 0)
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id);
+ else
+ value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id);
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, value);
+ gfx_v12_0_request_gfx_index_mutex(adev, false);
+ mutex_unlock(&adev->gfx.reset_sem_mutex);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
+ dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+
+ /* wait till dequeue take effects */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on hqd deactivate\n");
+ r = -ETIMEDOUT;
+ }
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else if (queue_type == AMDGPU_RING_TYPE_SDMA) {
+ dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n",
+ me_id, pipe_id, queue_id);
+ switch (me_id) {
+ case 1:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ);
+ break;
+ case 0:
+ default:
+ reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ);
+ break;
+ }
+
+ value = 1 << queue_id;
+ WREG32(reg, value);
+ /* wait for queue reset done */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32(reg) & value))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "failed to wait on sdma queue reset done\n");
+ r = -ETIMEDOUT;
+ }
+ }
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
}
static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes,
@@ -531,6 +649,14 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes,
sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en;
break;
+ case MES_MISC_OP_CHANGE_CONFIG:
+ misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;
+ misc_pkt.change_config.opcode =
+ MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS;
+ misc_pkt.change_config.option.bits.limit_single_process =
+ input->change_config.option.limit_single_process;
+ break;
+
default:
DRM_ERROR("unsupported misc op (%d) \n", input->op);
return -EINVAL;
@@ -550,7 +676,9 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe)
mes_set_hw_res_1_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
- mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100;
+ mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 0xa;
+ mes_set_hw_res_1_pkt.cleaner_shader_fence_mc_addr =
+ mes->resource_1_gpu_addr[pipe];
return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
@@ -621,9 +749,13 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe)
if (amdgpu_mes_log_enable) {
mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
- mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr + pipe * AMDGPU_MES_LOG_BUFFER_SIZE;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr +
+ pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
}
+ if (adev->enforce_isolation[0] == AMDGPU_ENFORCE_ISOLATION_ENABLE)
+ mes_set_hw_res_pkt.limit_single_process = 1;
+
return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
@@ -704,12 +836,17 @@ static void mes_v12_0_enable_unmapped_doorbell_handling(
WREG32_SOC15(GC, 0, regCP_UNMAPPED_DOORBELL, data);
}
-static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes,
- struct mes_reset_legacy_queue_input *input)
+static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes,
+ struct mes_reset_queue_input *input)
{
union MESAPI__RESET mes_reset_queue_pkt;
int pipe;
+ if (input->use_mmio)
+ return mes_v12_0_reset_queue_mmio(mes, input->queue_type,
+ input->me_id, input->pipe_id,
+ input->queue_id, input->vmid);
+
memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt));
mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
@@ -719,7 +856,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes,
mes_reset_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
- if (mes_reset_queue_pkt.queue_type == MES_QUEUE_TYPE_GFX) {
+ if (input->legacy_gfx) {
mes_reset_queue_pkt.reset_legacy_gfx = 1;
mes_reset_queue_pkt.pipe_id_lp = input->pipe_id;
mes_reset_queue_pkt.queue_id_lp = input->queue_id;
@@ -732,7 +869,7 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes,
mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
}
- if (mes->adev->enable_uni_mes)
+ if (input->is_kq)
pipe = AMDGPU_MES_KIQ_PIPE;
else
pipe = AMDGPU_MES_SCHED_PIPE;
@@ -750,12 +887,11 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = {
.suspend_gang = mes_v12_0_suspend_gang,
.resume_gang = mes_v12_0_resume_gang,
.misc_op = mes_v12_0_misc_op,
- .reset_legacy_queue = mes_v12_0_reset_legacy_queue,
.reset_hw_queue = mes_v12_0_reset_hw_queue,
};
static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r;
const struct mes_firmware_header_v1_0 *mes_hdr;
@@ -789,7 +925,7 @@ static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev,
}
static int mes_v12_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r;
const struct mes_firmware_header_v1_0 *mes_hdr;
@@ -823,7 +959,7 @@ static int mes_v12_0_allocate_ucode_data_buffer(struct amdgpu_device *adev,
}
static void mes_v12_0_free_ucode_buffers(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe],
&adev->mes.data_fw_gpu_addr[pipe],
@@ -840,29 +976,50 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable)
uint32_t pipe, data = 0;
if (enable) {
- data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
- WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
-
mutex_lock(&adev->srbm_mutex);
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
soc21_grbm_select(adev, 3, pipe, 0, 0);
+ if (amdgpu_mes_log_enable) {
+ u32 log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE;
+ /* In case uni mes is not enabled, only program for pipe 0 */
+ if (adev->mes.event_log_size >= (pipe + 1) * log_size) {
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO,
+ lower_32_bits(adev->mes.event_log_gpu_addr +
+ pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
+ WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI,
+ upper_32_bits(adev->mes.event_log_gpu_addr +
+ pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE));
+ dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n",
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI),
+ RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO));
+ }
+ }
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
+ if (pipe == 0)
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ else
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
ucode_addr = adev->mes.uc_start_addr[pipe] >> 2;
WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START,
lower_32_bits(ucode_addr));
WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI,
upper_32_bits(ucode_addr));
+
+ /* unhalt MES and activate one pipe each loop */
+ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ if (pipe)
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
+ dev_info(adev->dev, "program CP_MES_CNTL : 0x%x\n", data);
+
+ WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
+
}
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
- /* unhalt MES and activate pipe0 */
- data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
- data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1);
- WREG32_SOC15(GC, 0, regCP_MES_CNTL, data);
-
if (amdgpu_emu_mode)
msleep(100);
else if (adev->enable_uni_mes)
@@ -908,7 +1065,7 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev)
/* This function is for backdoor MES firmware */
static int mes_v12_0_load_microcode(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe, bool prime_icache)
+ enum amdgpu_mes_pipe pipe, bool prime_icache)
{
int r;
uint32_t data;
@@ -972,7 +1129,7 @@ static int mes_v12_0_load_microcode(struct amdgpu_device *adev,
}
static int mes_v12_0_allocate_eop_buf(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r;
u32 *eop;
@@ -1193,7 +1350,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev)
}
static int mes_v12_0_queue_init(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
struct amdgpu_ring *ring;
int r;
@@ -1225,17 +1382,20 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev,
mes_v12_0_queue_init_register(ring);
}
- /* get MES scheduler/KIQ versions */
- mutex_lock(&adev->srbm_mutex);
- soc21_grbm_select(adev, 3, pipe, 0, 0);
+ if (((pipe == AMDGPU_MES_SCHED_PIPE) && !adev->mes.sched_version) ||
+ ((pipe == AMDGPU_MES_KIQ_PIPE) && !adev->mes.kiq_version)) {
+ /* get MES scheduler/KIQ versions */
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
- if (pipe == AMDGPU_MES_SCHED_PIPE)
- adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
- else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
- adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+ adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
- soc21_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ }
return 0;
}
@@ -1293,7 +1453,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev)
}
static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum amdgpu_mes_pipe pipe)
{
int r, mqd_size = sizeof(struct v12_compute_mqd);
struct amdgpu_ring *ring;
@@ -1326,9 +1486,9 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev,
return 0;
}
-static int mes_v12_0_sw_init(void *handle)
+static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int pipe, r;
adev->mes.funcs = &mes_v12_0_funcs;
@@ -1336,8 +1496,9 @@ static int mes_v12_0_sw_init(void *handle)
adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini;
adev->mes.enable_legacy_queue_map = true;
- adev->mes.event_log_size = adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE) : AMDGPU_MES_LOG_BUFFER_SIZE;
-
+ adev->mes.event_log_size = adev->enable_uni_mes ?
+ (AMDGPU_MAX_MES_PIPES * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE)) :
+ (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE);
r = amdgpu_mes_init(adev);
if (r)
return r;
@@ -1351,23 +1512,38 @@ static int mes_v12_0_sw_init(void *handle)
if (r)
return r;
- if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
+ if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) {
r = mes_v12_0_kiq_ring_init(adev);
- else
+ }
+ else {
r = mes_v12_0_ring_init(adev, pipe);
- if (r)
- return r;
+ if (r)
+ return r;
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->mes.resource_1[pipe],
+ &adev->mes.resource_1_gpu_addr[pipe],
+ &adev->mes.resource_1_addr[pipe]);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes resource_1 bo pipe[%d]\n", r, pipe);
+ return r;
+ }
+ }
}
return 0;
}
-static int mes_v12_0_sw_fini(void *handle)
+static int mes_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int pipe;
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
+ amdgpu_bo_free_kernel(&adev->mes.resource_1[pipe],
+ &adev->mes.resource_1_gpu_addr[pipe],
+ &adev->mes.resource_1_addr[pipe]);
+
kfree(adev->mes.mqd_backup[pipe]);
amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
@@ -1444,14 +1620,13 @@ static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
- tmp |= 0x80;
- WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
}
static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
{
int r = 0;
+ struct amdgpu_ip_block *ip_block;
if (adev->enable_uni_mes)
mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]);
@@ -1479,6 +1654,12 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
mes_v12_0_enable(adev, true);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_MES);
+ if (unlikely(!ip_block)) {
+ dev_err(adev->dev, "Failed to get MES handle\n");
+ return -EINVAL;
+ }
+
r = mes_v12_0_queue_init(adev, AMDGPU_MES_KIQ_PIPE);
if (r)
goto failure;
@@ -1492,7 +1673,7 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
}
if (adev->mes.enable_legacy_queue_map) {
- r = mes_v12_0_hw_init(adev);
+ r = mes_v12_0_hw_init(ip_block);
if (r)
goto failure;
}
@@ -1500,7 +1681,7 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev)
return r;
failure:
- mes_v12_0_hw_fini(adev);
+ mes_v12_0_hw_fini(ip_block);
return r;
}
@@ -1522,10 +1703,10 @@ static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev)
return 0;
}
-static int mes_v12_0_hw_init(void *handle)
+static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->mes.ring[0].sched.ready)
goto out;
@@ -1561,7 +1742,7 @@ static int mes_v12_0_hw_init(void *handle)
if (r)
goto failure;
- if (adev->enable_uni_mes)
+ if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x4b)
mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
mes_v12_0_init_aggregated_doorbell(&adev->mes);
@@ -1572,6 +1753,10 @@ static int mes_v12_0_hw_init(void *handle)
goto failure;
}
+ r = amdgpu_mes_update_enforce_isolation(adev);
+ if (r)
+ goto failure;
+
out:
/*
* Disable KIQ ring usage from the driver once MES is enabled.
@@ -1584,42 +1769,28 @@ out:
return 0;
failure:
- mes_v12_0_hw_fini(adev);
+ mes_v12_0_hw_fini(ip_block);
return r;
}
-static int mes_v12_0_hw_fini(void *handle)
+static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int mes_v12_0_suspend(void *handle)
+static int mes_v12_0_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = amdgpu_mes_suspend(adev);
- if (r)
- return r;
-
- return mes_v12_0_hw_fini(adev);
+ return mes_v12_0_hw_fini(ip_block);
}
-static int mes_v12_0_resume(void *handle)
+static int mes_v12_0_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = mes_v12_0_hw_init(adev);
- if (r)
- return r;
-
- return amdgpu_mes_resume(adev);
+ return mes_v12_0_hw_init(ip_block);
}
-static int mes_v12_0_early_init(void *handle)
+static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int pipe, r;
for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
@@ -1631,21 +1802,10 @@ static int mes_v12_0_early_init(void *handle)
return 0;
}
-static int mes_v12_0_late_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- /* it's only intended for use in mes_self_test case, not for s0ix and reset */
- if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend)
- amdgpu_mes_self_test(adev);
-
- return 0;
-}
-
static const struct amd_ip_funcs mes_v12_0_ip_funcs = {
.name = "mes_v12_0",
.early_init = mes_v12_0_early_init,
- .late_init = mes_v12_0_late_init,
+ .late_init = NULL,
.sw_init = mes_v12_0_sw_init,
.sw_fini = mes_v12_0_sw_fini,
.hw_init = mes_v12_0_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index e3ddd22aa172..243eabda0607 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -229,6 +229,52 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
0);
}
+static void mmhub_v1_0_init_saw(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+ uint32_t tmp;
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ lower_32_bits(pt_base >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ upper_32_bits(pt_base >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+
+ /* VM_9_X_REGISTER_VM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 */
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+
+ /* Program SAW CONTEXT0 CNTL */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_CNTL);
+ tmp |= 1 << CONTEXT0_CNTL_ENABLE_OFFSET;
+ tmp &= ~(3 << CONTEXT0_CNTL_PAGE_TABLE_DEPTH_OFFSET);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXT0_CNTL, tmp);
+
+ /* Disable all Contexts except Context0 */
+ tmp = 0xfffe;
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CONTEXTS_DISABLE, tmp);
+
+ /* Program SAW CNTL4 */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CNTL4);
+ tmp |= 1 << VMC_TAP_PDE_REQUEST_SNOOP_OFFSET;
+ tmp |= 1 << VMC_TAP_PTE_REQUEST_SNOOP_OFFSET;
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_SAW_CNTL4, tmp);
+}
+
static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
@@ -283,6 +329,9 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
+
+ if (amdgpu_ip_version(adev, ISP_HWIP, 0))
+ mmhub_v1_0_init_saw(adev);
}
static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
@@ -307,7 +356,7 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB)
amdgpu_dpm_set_powergating_by_smu(adev,
AMD_IP_BLOCK_TYPE_GMC,
- enable);
+ enable, 0);
}
static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index 9689e2b5d4e5..2adee2b94c37 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -172,6 +172,30 @@ static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp);
}
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v1_7_init_snoop_override_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int i;
+ uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+
+ for (i = 0; i < 5; i++) { /* DAGB instances */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance, tmp);
+ }
+
+}
+
static void mmhub_v1_7_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
@@ -337,6 +361,7 @@ static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev)
mmhub_v1_7_init_system_aperture_regs(adev);
mmhub_v1_7_init_tlb_regs(adev);
mmhub_v1_7_init_cache_regs(adev);
+ mmhub_v1_7_init_snoop_override_regs(adev);
mmhub_v1_7_enable_system_domain(adev);
mmhub_v1_7_disable_identity_aperture(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index b01bb759d0f4..76167fadb292 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -30,10 +30,10 @@
#include "soc15_common.h"
#include "soc15.h"
#include "amdgpu_ras.h"
+#include "amdgpu_psp.h"
#define regVM_L2_CNTL3_DEFAULT 0x80100007
#define regVM_L2_CNTL4_DEFAULT 0x000000c1
-#define mmSMNAID_AID0_MCA_SMU 0x03b30400
static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev)
{
@@ -193,10 +193,8 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
uint32_t tmp, inst_mask;
int i;
- /* Setup TLB control */
- inst_mask = adev->aid_mask;
- for_each_inst(i, inst_mask) {
- tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+ if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
1);
@@ -210,7 +208,55 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev)
MTYPE, MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
- WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
+ } else {
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC);/* XXX for emulation. */
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
+
+ WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+ }
+}
+
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v1_8_init_snoop_override_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp, inst_mask;
+ int i, j;
+ uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ for (j = 0; j < 5; j++) { /* DAGB instances */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, i,
+ regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance, tmp);
+ }
}
}
@@ -419,6 +465,7 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev)
mmhub_v1_8_init_system_aperture_regs(adev);
mmhub_v1_8_init_tlb_regs(adev);
mmhub_v1_8_init_cache_regs(adev);
+ mmhub_v1_8_init_snoop_override_regs(adev);
mmhub_v1_8_enable_system_domain(adev);
mmhub_v1_8_disable_identity_aperture(adev);
@@ -428,6 +475,30 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev)
return 0;
}
+static void mmhub_v1_8_disable_l1_tlb(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ u32 i, inst_mask;
+
+ if (amdgpu_sriov_reg_indirect_l1_tlb_cntl(adev)) {
+ tmp = RREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ psp_reg_program_no_ring(&adev->psp, tmp, PSP_REG_MMHUB_L1_TLB_CNTL);
+ } else {
+ inst_mask = adev->aid_mask;
+ for_each_inst(i, inst_mask) {
+ tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL, tmp);
+ }
+ }
+}
+
static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub;
@@ -441,15 +512,6 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
-
- /* Setup TLB control */
- tmp = RREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB,
- 0);
- tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL,
- ENABLE_ADVANCED_DRIVER_MODEL, 0);
- WREG32_SOC15(MMHUB, j, regMC_VM_MX_L1_TLB_CNTL, tmp);
-
if (!amdgpu_sriov_vf(adev)) {
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, j, regVM_L2_CNTL);
@@ -459,6 +521,8 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, j, regVM_L2_CNTL3, 0);
}
}
+
+ mmhub_v1_8_disable_l1_tlb(adev);
}
/**
@@ -720,11 +784,13 @@ static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank
misc0 = bank->regs[ACA_REG_IDX_MISC0];
switch (type) {
case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
1ULL);
break;
case ACA_SMU_TYPE_CE:
- ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
ACA_REG__MISC0__ERRCNT(misc0));
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
index b4ce3375d3fd..bc3d6c2fc87a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c
@@ -103,6 +103,7 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev,
switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(3, 3, 0):
case IP_VERSION(3, 3, 1):
+ case IP_VERSION(3, 3, 2):
mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ?
mmhub_client_ids_v3_3[cid][rw] :
cid == 0x140 ? "UMSCH" : NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
index 0fbc3be81f14..f2ab5001b492 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c
@@ -108,7 +108,7 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n",
status);
- switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) {
case IP_VERSION(4, 1, 0):
mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw];
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index ff1b58e44689..fe0710b55c3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -198,6 +198,36 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid)
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
}
+/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */
+static void mmhub_v9_4_init_snoop_override_regs(struct amdgpu_device *adev, int hubid)
+{
+ uint32_t tmp;
+ int i;
+ uint32_t distance = mmDAGB1_WRCLI_GPU_SNOOP_OVERRIDE -
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE;
+ uint32_t huboffset = hubid * MMHUB_INSTANCE_REGISTER_OFFSET;
+
+ for (i = 0; i < 5 - (2 * hubid); i++) {
+ /* DAGB instances 0 to 4 are in hub0 and 5 to 7 are in hub1 */
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE,
+ huboffset + i * distance);
+ tmp |= (1 << 15); /* SDMA client is BIT15 */
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE,
+ huboffset + i * distance, tmp);
+
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE,
+ huboffset + i * distance);
+ tmp |= (1 << 15);
+ WREG32_SOC15_OFFSET(MMHUB, 0,
+ mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE,
+ huboffset + i * distance, tmp);
+ }
+
+}
+
static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid)
{
uint32_t tmp;
@@ -392,6 +422,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev))
mmhub_v9_4_init_cache_regs(adev, i);
+ mmhub_v9_4_init_snoop_override_regs(adev, i);
mmhub_v9_4_enable_system_domain(adev, i);
if (!amdgpu_sriov_vf(adev))
mmhub_v9_4_disable_identity_aperture(adev, i);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h
new file mode 100644
index 000000000000..6f749814929f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v5_0.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MMSCH_V5_0_H__
+#define __MMSCH_V5_0_H__
+
+#include "amdgpu_vcn.h"
+
+#define MMSCH_VERSION_MAJOR 5
+#define MMSCH_VERSION_MINOR 0
+#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR)
+
+#define RB_ENABLED (1 << 0)
+#define RB4_ENABLED (1 << 1)
+
+#define MMSCH_VF_ENGINE_STATUS__PASS 0x1
+
+#define MMSCH_VF_MAILBOX_RESP__OK 0x1
+#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2
+#define MMSCH_VF_MAILBOX_RESP__FAILED 0x3
+#define MMSCH_VF_MAILBOX_RESP__FAILED_SMALL_CTX_SIZE 0x4
+#define MMSCH_VF_MAILBOX_RESP__UNKNOWN_CMD 0x5
+
+enum mmsch_v5_0_command_type {
+ MMSCH_COMMAND__DIRECT_REG_WRITE = 0,
+ MMSCH_COMMAND__DIRECT_REG_POLLING = 2,
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3,
+ MMSCH_COMMAND__INDIRECT_REG_WRITE = 8,
+ MMSCH_COMMAND__END = 0xf
+};
+
+struct mmsch_v5_0_table_info {
+ uint32_t init_status;
+ uint32_t table_offset;
+ uint32_t table_size;
+};
+
+struct mmsch_v5_0_init_header {
+ uint32_t version;
+ uint32_t total_size;
+ struct mmsch_v5_0_table_info vcn0;
+ struct mmsch_v5_0_table_info mjpegdec0[5];
+ struct mmsch_v5_0_table_info mjpegdec1[5];
+};
+
+struct mmsch_v5_0_cmd_direct_reg_header {
+ uint32_t reg_offset : 28;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v5_0_cmd_indirect_reg_header {
+ uint32_t reg_offset : 20;
+ uint32_t reg_idx_space : 8;
+ uint32_t command_type : 4;
+};
+
+struct mmsch_v5_0_cmd_direct_write {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+struct mmsch_v5_0_cmd_direct_read_modify_write {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t write_data;
+ uint32_t mask_value;
+};
+
+struct mmsch_v5_0_cmd_direct_polling {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+ uint32_t mask_value;
+ uint32_t wait_value;
+};
+
+struct mmsch_v5_0_cmd_end {
+ struct mmsch_v5_0_cmd_direct_reg_header cmd_header;
+};
+
+struct mmsch_v5_0_cmd_indirect_write {
+ struct mmsch_v5_0_cmd_indirect_reg_header cmd_header;
+ uint32_t reg_value;
+};
+
+#define MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_read_modify_write); \
+ size_dw = size / 4; \
+ direct_rd_mod_wt.cmd_header.reg_offset = reg; \
+ direct_rd_mod_wt.mask_value = mask; \
+ direct_rd_mod_wt.write_data = data; \
+ memcpy((void *)table_loc, &direct_rd_mod_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_DIRECT_WT(reg, value) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_write); \
+ size_dw = size / 4; \
+ direct_wt.cmd_header.reg_offset = reg; \
+ direct_wt.reg_value = value; \
+ memcpy((void *)table_loc, &direct_wt, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_DIRECT_POLL(reg, mask, wait) { \
+ size = sizeof(struct mmsch_v5_0_cmd_direct_polling); \
+ size_dw = size / 4; \
+ direct_poll.cmd_header.reg_offset = reg; \
+ direct_poll.mask_value = mask; \
+ direct_poll.wait_value = wait; \
+ memcpy((void *)table_loc, &direct_poll, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#define MMSCH_V5_0_INSERT_END() { \
+ size = sizeof(struct mmsch_v5_0_cmd_end); \
+ size_dw = size / 4; \
+ memcpy((void *)table_loc, &end, size); \
+ table_loc += size_dw; \
+ table_size += size_dw; \
+}
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index f5411b798e11..48101a34e049 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -274,6 +274,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
{
struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ struct amdgpu_reset_context reset_context = { 0 };
amdgpu_virt_fini_data_exchange(adev);
@@ -281,8 +282,6 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
if (amdgpu_device_should_recover_gpu(adev)
&& (!amdgpu_device_has_job_running(adev) ||
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) {
- struct amdgpu_reset_context reset_context;
- memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
@@ -293,6 +292,19 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
}
}
+static void xgpu_ai_mailbox_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_request_bad_pages(adev);
+ amdgpu_virt_init_data_exchange(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
@@ -312,26 +324,42 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (event) {
- case IDH_FLR_NOTIFICATION:
+ case IDH_RAS_BAD_PAGES_NOTIFICATION:
+ xgpu_ai_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.bad_pages_work);
+ break;
+ case IDH_UNRECOV_ERR_NOTIFICATION:
+ xgpu_ai_mailbox_send_ack(adev);
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n");
if (amdgpu_sriov_runtime(adev))
WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
- &adev->virt.flr_work),
- "Failed to queue work! at %s",
- __func__);
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
break;
- case IDH_QUERY_ALIVE:
- xgpu_ai_mailbox_send_ack(adev);
- break;
- /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
- * it byfar since that polling thread will handle it,
- * other msg like flr complete is not handled here.
- */
- case IDH_CLR_MSG_BUF:
- case IDH_FLR_NOTIFICATION_CMPL:
- case IDH_READY_TO_ACCESS_GPU:
- default:
+ case IDH_FLR_NOTIFICATION:
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
+ case IDH_QUERY_ALIVE:
+ xgpu_ai_mailbox_send_ack(adev);
+ break;
+ /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+ * it byfar since that polling thread will handle it,
+ * other msg like flr complete is not handled here.
+ */
+ case IDH_CLR_MSG_BUF:
+ case IDH_FLR_NOTIFICATION_CMPL:
+ case IDH_READY_TO_ACCESS_GPU:
+ default:
break;
}
@@ -387,6 +415,7 @@ int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
}
INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
+ INIT_WORK(&adev->virt.bad_pages_work, xgpu_ai_mailbox_bad_pages_work);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index ed57cbc150af..874b9f8f9804 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -40,6 +40,7 @@ enum idh_request {
IDH_LOG_VF_ERROR = 200,
IDH_READY_TO_RESET = 201,
IDH_RAS_POISON = 202,
+ IDH_REQ_RAS_BAD_PAGES = 205,
};
enum idh_event {
@@ -54,6 +55,9 @@ enum idh_event {
IDH_RAS_POISON_READY,
IDH_PF_SOFT_FLR_NOTIFICATION,
IDH_RAS_ERROR_DETECTED,
+ IDH_RAS_BAD_PAGES_READY = 15,
+ IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
+ IDH_UNRECOV_ERR_NOTIFICATION = 17,
IDH_TEXT_MESSAGE = 255,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index f47bd7ada4d7..f6d8597452ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -61,15 +61,20 @@ static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
enum idh_event event)
{
+ int r = 0;
u32 reg;
reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
- if (reg != event)
+ if (reg == IDH_FAIL)
+ r = -EINVAL;
+ if (reg == IDH_UNRECOV_ERR_NOTIFICATION)
+ r = -ENODEV;
+ else if (reg != event)
return -ENOENT;
xgpu_nv_mailbox_send_ack(adev);
- return 0;
+ return r;
}
static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev)
@@ -100,6 +105,7 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
{
int r;
uint64_t timeout, now;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
now = (uint64_t)ktime_to_ms(ktime_get());
timeout = now + NV_MAILBOX_POLL_MSG_TIMEDOUT;
@@ -107,8 +113,16 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)
do {
r = xgpu_nv_mailbox_rcv_msg(adev, event);
if (!r) {
- dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n", event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now);
+ dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n",
+ event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now);
return 0;
+ } else if (r == -ENODEV) {
+ if (!amdgpu_ras_is_rma(adev)) {
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. "
+ "Runtime Services are halted.\n");
+ }
+ return r;
}
msleep(10);
@@ -163,6 +177,10 @@ static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
enum idh_event event = -1;
send_request:
+
+ if (amdgpu_ras_is_rma(adev))
+ return -ENODEV;
+
xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3);
switch (req) {
@@ -178,6 +196,15 @@ send_request:
if (data1 != 0)
event = IDH_RAS_POISON_READY;
break;
+ case IDH_REQ_RAS_ERROR_COUNT:
+ event = IDH_RAS_ERROR_COUNT_READY;
+ break;
+ case IDH_REQ_RAS_CPER_DUMP:
+ event = IDH_RAS_CPER_DUMP_READY;
+ break;
+ case IDH_REQ_RAS_BAD_PAGES:
+ event = IDH_RAS_BAD_PAGES_READY;
+ break;
default:
break;
}
@@ -311,6 +338,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
{
struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+ struct amdgpu_reset_context reset_context = { 0 };
amdgpu_virt_fini_data_exchange(adev);
@@ -321,8 +349,6 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) {
- struct amdgpu_reset_context reset_context;
- memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
@@ -333,6 +359,19 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
}
}
+static void xgpu_nv_mailbox_bad_pages_work(struct work_struct *work)
+{
+ struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, bad_pages_work);
+ struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+ if (down_read_trylock(&adev->reset_domain->sem)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ amdgpu_virt_request_bad_pages(adev);
+ amdgpu_virt_init_data_exchange(adev);
+ up_read(&adev->reset_domain->sem);
+ }
+}
+
static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
@@ -355,8 +394,27 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
enum idh_event event = xgpu_nv_mailbox_peek_msg(adev);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
switch (event) {
+ case IDH_RAS_BAD_PAGES_NOTIFICATION:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.bad_pages_work);
+ break;
+ case IDH_UNRECOV_ERR_NOTIFICATION:
+ xgpu_nv_mailbox_send_ack(adev);
+ if (!amdgpu_ras_is_rma(adev)) {
+ ras->is_rma = true;
+ dev_err(adev->dev, "VF is in an unrecoverable state. Runtime Services are halted.\n");
+ }
+
+ if (amdgpu_sriov_runtime(adev))
+ WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work),
+ "Failed to queue work! at %s",
+ __func__);
+ break;
case IDH_FLR_NOTIFICATION:
if (amdgpu_sriov_runtime(adev))
WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
@@ -427,6 +485,7 @@ int xgpu_nv_mailbox_get_irq(struct amdgpu_device *adev)
}
INIT_WORK(&adev->virt.flr_work, xgpu_nv_mailbox_flr_work);
+ INIT_WORK(&adev->virt.bad_pages_work, xgpu_nv_mailbox_bad_pages_work);
return 0;
}
@@ -456,6 +515,26 @@ static bool xgpu_nv_rcvd_ras_intr(struct amdgpu_device *adev)
return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF);
}
+static int xgpu_nv_req_ras_err_count(struct amdgpu_device *adev)
+{
+ return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_ERROR_COUNT);
+}
+
+static int xgpu_nv_req_ras_cper_dump(struct amdgpu_device *adev, u64 vf_rptr)
+{
+ uint32_t vf_rptr_hi, vf_rptr_lo;
+
+ vf_rptr_hi = (uint32_t)(vf_rptr >> 32);
+ vf_rptr_lo = (uint32_t)(vf_rptr & 0xFFFFFFFF);
+ return xgpu_nv_send_access_requests_with_param(
+ adev, IDH_REQ_RAS_CPER_DUMP, vf_rptr_hi, vf_rptr_lo, 0);
+}
+
+static int xgpu_nv_req_ras_bad_pages(struct amdgpu_device *adev)
+{
+ return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_BAD_PAGES);
+}
+
const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.req_full_gpu = xgpu_nv_request_full_gpu_access,
.rel_full_gpu = xgpu_nv_release_full_gpu_access,
@@ -466,4 +545,7 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.trans_msg = xgpu_nv_mailbox_trans_msg,
.ras_poison_handler = xgpu_nv_ras_poison_handler,
.rcvd_ras_intr = xgpu_nv_rcvd_ras_intr,
+ .req_ras_err_count = xgpu_nv_req_ras_err_count,
+ .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump,
+ .req_bad_pages = xgpu_nv_req_ras_bad_pages,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 1d099ffb3a5a..5808689562cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -40,6 +40,9 @@ enum idh_request {
IDH_LOG_VF_ERROR = 200,
IDH_READY_TO_RESET = 201,
IDH_RAS_POISON = 202,
+ IDH_REQ_RAS_ERROR_COUNT = 203,
+ IDH_REQ_RAS_CPER_DUMP = 204,
+ IDH_REQ_RAS_BAD_PAGES = 205,
};
enum idh_event {
@@ -54,6 +57,12 @@ enum idh_event {
IDH_RAS_POISON_READY,
IDH_PF_SOFT_FLR_NOTIFICATION,
IDH_RAS_ERROR_DETECTED,
+ IDH_RAS_ERROR_COUNT_READY = 11,
+ IDH_RAS_CPER_DUMP_READY = 14,
+ IDH_RAS_BAD_PAGES_READY = 15,
+ IDH_RAS_BAD_PAGES_NOTIFICATION = 16,
+ IDH_UNRECOV_ERR_NOTIFICATION = 17,
+
IDH_TEXT_MESSAGE = 255,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index b281462093f1..4cd325149b63 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -434,9 +434,8 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
* this should allow us to catch up.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
@@ -542,19 +541,19 @@ static void navi10_ih_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &navi10_ih_self_irq_funcs;
}
-static int navi10_ih_early_init(void *handle)
+static int navi10_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
navi10_ih_set_interrupt_funcs(adev);
navi10_ih_set_self_irq_funcs(adev);
return 0;
}
-static int navi10_ih_sw_init(void *handle)
+static int navi10_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
@@ -593,58 +592,52 @@ static int navi10_ih_sw_init(void *handle)
return r;
}
-static int navi10_ih_sw_fini(void *handle)
+static int navi10_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int navi10_ih_hw_init(void *handle)
+static int navi10_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return navi10_ih_irq_init(adev);
}
-static int navi10_ih_hw_fini(void *handle)
+static int navi10_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- navi10_ih_irq_disable(adev);
+ navi10_ih_irq_disable(ip_block->adev);
return 0;
}
-static int navi10_ih_suspend(void *handle)
+static int navi10_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return navi10_ih_hw_fini(adev);
+ return navi10_ih_hw_fini(ip_block);
}
-static int navi10_ih_resume(void *handle)
+static int navi10_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return navi10_ih_hw_init(adev);
+ return navi10_ih_hw_init(ip_block);
}
-static bool navi10_ih_is_idle(void *handle)
+static bool navi10_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int navi10_ih_wait_for_idle(void *handle)
+static int navi10_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int navi10_ih_soft_reset(void *handle)
+static int navi10_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
@@ -673,25 +666,25 @@ static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int navi10_ih_set_clockgating_state(void *handle,
+static int navi10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
navi10_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
return 0;
}
-static int navi10_ih_set_powergating_state(void *handle,
+static int navi10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void navi10_ih_get_clockgating_state(void *handle, u64 *flags)
+static void navi10_ih_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL))
*flags |= AMD_CG_SUPPORT_IH_CG;
@@ -700,7 +693,6 @@ static void navi10_ih_get_clockgating_state(void *handle, u64 *flags)
static const struct amd_ip_funcs navi10_ih_ip_funcs = {
.name = "navi10_ih",
.early_init = navi10_ih_early_init,
- .late_init = NULL,
.sw_init = navi10_ih_sw_init,
.sw_fini = navi10_ih_sw_fini,
.hw_init = navi10_ih_hw_init,
@@ -713,8 +705,6 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = {
.set_clockgating_state = navi10_ih_set_clockgating_state,
.set_powergating_state = navi10_ih_set_powergating_state,
.get_clockgating_state = navi10_ih_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs navi10_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
index a5b60c9a2418..c88284ff92d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
@@ -68,6 +68,7 @@
#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2
#define SDMA_SUBOP_POLL_MEM_VERIFY 3
+#define SDMA_SUBOP_VM_INVALIDATION 4
#define HEADER_AGENT_DISPATCH 4
#define HEADER_BARRIER 5
#define SDMA_OP_AQL_COPY 0
@@ -4041,6 +4042,69 @@
/*
+** Definitions for SDMA_PKT_VM_INVALIDATION packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8
+#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift)
+
+/*define for gfx_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift 16
+#define SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_gfx_eng_id_shift)
+
+/*define for mm_eng_id field*/
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_offset 0
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift 24
+#define SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(x) (((x) & SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mm_eng_id_shift)
+
+/*define for INVALIDATEREQ word*/
+/*define for invalidatereq field*/
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0
+#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) (((x) & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift)
+
+/*define for ADDRESSRANGELO word*/
+/*define for addressrangelo field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift)
+
+/*define for ADDRESSRANGEHI word*/
+/*define for invalidateack field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift)
+
+/*define for addressrangehi field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift)
+
+/*define for reserved field*/
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23
+#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) (((x) & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift)
+
+
+/*
** Definitions for SDMA_PKT_ATOMIC packet
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
index 39919e0892c1..9b4025c39e44 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c
@@ -21,13 +21,13 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbif_v6_3_1.h"
#include "nbif/nbif_6_3_1_offset.h"
#include "nbif/nbif_6_3_1_sh_mask.h"
#include "pcie/pcie_6_1_0_offset.h"
#include "pcie/pcie_6_1_0_sh_mask.h"
+#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include <uapi/linux/kfd_ioctl.h>
static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev)
@@ -473,48 +473,82 @@ const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = {
};
-static void nbif_v6_3_1_sriov_ih_doorbell_range(struct amdgpu_device *adev,
- bool use_doorbell, int doorbell_index)
+static int nbif_v6_3_1_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
{
+ /* The ras_controller_irq enablement should be done in psp bl when it
+ * tries to enable ras feature. Driver only need to set the correct interrupt
+ * vector for bare-metal and sriov use case respectively
+ */
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_DISABLE,
+ (state == AMDGPU_IRQ_STATE_ENABLE) ? 0 : 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+
+ return 0;
}
-static void nbif_v6_3_1_sriov_sdma_doorbell_range(struct amdgpu_device *adev,
- int instance, bool use_doorbell,
- int doorbell_index,
- int doorbell_size)
+static int nbif_v6_3_1_process_err_event_athub_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
{
+ /* By design, the ih cookie for err_event_athub_irq should be written
+ * to bif ring. since bif ring is not enabled, just leave process callback
+ * as a dummy one.
+ */
+ return 0;
}
-static void nbif_v6_3_1_sriov_vcn_doorbell_range(struct amdgpu_device *adev,
- bool use_doorbell,
- int doorbell_index, int instance)
+static const struct amdgpu_irq_src_funcs nbif_v6_3_1_ras_err_event_athub_irq_funcs = {
+ .set = nbif_v6_3_1_set_ras_err_event_athub_irq_state,
+ .process = nbif_v6_3_1_process_err_event_athub_irq,
+};
+
+static void nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev)
{
+ uint32_t bif_doorbell_int_cntl;
+
+ bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+ if (REG_GET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+ /* driver has to clear the interrupt status when bif ring is disabled */
+ bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl,
+ BIF_BX0_BIF_DOORBELL_INT_CNTL,
+ RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1);
+ WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl);
+ amdgpu_ras_global_ras_isr(adev);
+ }
}
-static void nbif_v6_3_1_sriov_gc_doorbell_init(struct amdgpu_device *adev)
+static int nbif_v6_3_1_init_ras_err_event_athub_interrupt(struct amdgpu_device *adev)
{
+ int r;
+
+ /* init the irq funcs */
+ adev->nbio.ras_err_event_athub_irq.funcs =
+ &nbif_v6_3_1_ras_err_event_athub_irq_funcs;
+ adev->nbio.ras_err_event_athub_irq.num_types = 1;
+
+ /* register ras err event athub interrupt
+ * nbif v6_3_1 uses the same irq source as nbio v7_4
+ */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_BIF,
+ NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT,
+ &adev->nbio.ras_err_event_athub_irq);
+
+ return r;
}
-const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = {
- .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset,
- .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset,
- .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset,
- .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset,
- .get_rev_id = nbif_v6_3_1_get_rev_id,
- .mc_access_enable = nbif_v6_3_1_mc_access_enable,
- .get_memsize = nbif_v6_3_1_get_memsize,
- .sdma_doorbell_range = nbif_v6_3_1_sriov_sdma_doorbell_range,
- .vcn_doorbell_range = nbif_v6_3_1_sriov_vcn_doorbell_range,
- .gc_doorbell_init = nbif_v6_3_1_sriov_gc_doorbell_init,
- .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture,
- .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture,
- .ih_doorbell_range = nbif_v6_3_1_sriov_ih_doorbell_range,
- .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating,
- .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep,
- .get_clockgating_state = nbif_v6_3_1_get_clockgating_state,
- .ih_control = nbif_v6_3_1_ih_control,
- .init_registers = nbif_v6_3_1_init_registers,
- .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers,
- .get_rom_offset = nbif_v6_3_1_get_rom_offset,
- .set_reg_remap = nbif_v6_3_1_set_reg_remap,
+struct amdgpu_nbio_ras nbif_v6_3_1_ras = {
+ .handle_ras_err_event_athub_intr_no_bifring =
+ nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring,
+ .init_ras_err_event_athub_interrupt =
+ nbif_v6_3_1_init_ras_err_event_athub_interrupt,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
index b7f2e0d88905..3afec715a9fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h
@@ -28,6 +28,6 @@
extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg;
extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs;
-extern const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs;
+extern struct amdgpu_nbio_ras nbif_v6_3_1_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 739fce4fa8fd..04041b398781 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v2_3.h"
#include "nbio/nbio_2_3_default.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
index a54052dea8bf..f89e5f40e1a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v4_3.h"
#include "nbio/nbio_4_3_0_offset.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 34180c6070dd..e911368c1aeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v6_1.h"
#include "nbio/nbio_6_1_default.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index b1b57dcc5a73..1569a1e934ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_0.h"
#include "nbio/nbio_7_0_default.h"
@@ -271,8 +270,19 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
+#define regRCC_DEV0_EPF6_STRAP4 0xd304
+#define regRCC_DEV0_EPF6_STRAP4_BASE_IDX 5
+
static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
{
+ uint32_t data;
+
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(2, 5, 0):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4, data);
+ break;
+ }
}
#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
index 7a9adfda5814..bed5ef4d8788 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_11.h"
#include "nbio/nbio_7_11_0_offset.h"
@@ -275,6 +274,15 @@ static void nbio_v7_11_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data);
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 11, 0):
+ case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
+ case IP_VERSION(7, 11, 3):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data);
+ break;
+ }
}
static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev,
@@ -352,7 +360,7 @@ static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev,
*flags |= AMD_CG_SUPPORT_BIF_LS;
}
-#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+#define MMIO_REG_HOLE_OFFSET 0x44000
static void nbio_v7_11_set_reg_remap(struct amdgpu_device *adev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
index a766e2d90cd0..acc5f363684a 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_2.h"
#include "nbio/nbio_7_2_0_offset.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 8d80df94bd8b..d5002ff931d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_4.h"
#include "amdgpu_ras.h"
@@ -414,8 +413,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device
/* ras_controller_int is dedicated for nbif ras error,
* not the global interrupt for sync flood
*/
- amdgpu_ras_set_fed(adev, true);
- amdgpu_ras_reset_gpu(adev);
+ amdgpu_ras_global_ras_isr(adev);
}
amdgpu_ras_error_data_fini(&err_data);
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
index fb37e354a9d5..2ee60b8746a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_7.h"
#include "nbio/nbio_7_7_0_offset.h"
@@ -247,6 +246,12 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3, data);
+ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
+ case IP_VERSION(7, 7, 0):
+ data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23);
+ WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data);
+ break;
+ }
}
static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index d1bd79bbae53..a376f072700d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "nbio_v7_9.h"
#include "amdgpu_ras.h"
@@ -178,8 +177,12 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
{
u32 doorbell_range = 0, doorbell_ctrl = 0;
u32 aid_id = instance;
+ u32 range_size;
if (use_doorbell) {
+ range_size = (amdgpu_ip_version(adev, GC_HWIP, 0) ==
+ IP_VERSION(9, 5, 0)) ?
+ 0xb : 0x9;
doorbell_range = REG_SET_FIELD(doorbell_range,
DOORBELL0_CTRL_ENTRY_0,
BIF_DOORBELL0_RANGE_OFFSET_ENTRY,
@@ -187,7 +190,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
doorbell_range = REG_SET_FIELD(doorbell_range,
DOORBELL0_CTRL_ENTRY_0,
BIF_DOORBELL0_RANGE_SIZE_ENTRY,
- 0x9);
+ range_size);
if (aid_id)
doorbell_range = REG_SET_FIELD(doorbell_range,
DOORBELL0_CTRL_ENTRY_0,
@@ -205,7 +208,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do
S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
- S2A_DOORBELL_PORT1_RANGE_SIZE, 0x9);
+ S2A_DOORBELL_PORT1_RANGE_SIZE, range_size);
doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl,
S2A_DOORBELL_ENTRY_1_CTRL,
S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4);
@@ -401,6 +404,17 @@ static int nbio_v7_9_get_compute_partition_mode(struct amdgpu_device *adev)
return px;
}
+static bool nbio_v7_9_is_nps_switch_requested(struct amdgpu_device *adev)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS);
+ tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS,
+ CHANGE_STATUE);
+
+ /* 0x8 - NPS switch requested */
+ return (tmp == 0x8);
+}
static u32 nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev,
u32 *supp_modes)
{
@@ -508,6 +522,7 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
.remap_hdp_registers = nbio_v7_9_remap_hdp_registers,
.get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode,
.get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode,
+ .is_nps_switch_requested = nbio_v7_9_is_nps_switch_requested,
.init_registers = nbio_v7_9_init_registers,
.get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count,
.set_reg_remap = nbio_v7_9_set_reg_remap,
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 4938e6b340e9..50e77d9b30af 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -67,8 +67,8 @@ static const struct amd_ip_funcs nv_common_ip_funcs;
/* Navi */
static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 0)},
};
static const struct amdgpu_video_codecs nv_video_codecs_encode = {
@@ -78,12 +78,12 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode = {
/* Navi1x */
static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -94,8 +94,8 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode = {
/* Sienna Cichlid */
static const struct amdgpu_video_codec_info sc_video_codecs_encode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2160, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 7680, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
};
static const struct amdgpu_video_codecs sc_video_codecs_encode = {
@@ -104,10 +104,10 @@ static const struct amdgpu_video_codecs sc_video_codecs_encode = {
};
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@@ -115,10 +115,10 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[]
};
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@@ -136,28 +136,28 @@ static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn1 = {
/* SRIOV Sienna Cichlid, not const since data is controlled by host */
static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2160, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 7680, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
};
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -454,6 +454,7 @@ nv_asic_reset_method(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 3):
case IP_VERSION(13, 0, 5):
@@ -634,9 +635,9 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = {
.query_video_codecs = &nv_query_video_codecs,
};
-static int nv_common_early_init(void *handle)
+static int nv_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
@@ -944,9 +945,9 @@ static int nv_common_early_init(void *handle)
return 0;
}
-static int nv_common_late_init(void *handle)
+static int nv_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev)) {
xgpu_nv_mailbox_get_irq(adev);
@@ -973,9 +974,9 @@ static int nv_common_late_init(void *handle)
return 0;
}
-static int nv_common_sw_init(void *handle)
+static int nv_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_add_irq_id(adev);
@@ -983,14 +984,9 @@ static int nv_common_sw_init(void *handle)
return 0;
}
-static int nv_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-static int nv_common_hw_init(void *handle)
+static int nv_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->nbio.funcs->apply_lc_spc_mode_wa)
adev->nbio.funcs->apply_lc_spc_mode_wa(adev);
@@ -1014,9 +1010,9 @@ static int nv_common_hw_init(void *handle)
return 0;
}
-static int nv_common_hw_fini(void *handle)
+static int nv_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* Disable the doorbell aperture and selfring doorbell aperture
* separately in hw_fini because nv_enable_doorbell_aperture
@@ -1029,39 +1025,25 @@ static int nv_common_hw_fini(void *handle)
return 0;
}
-static int nv_common_suspend(void *handle)
+static int nv_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return nv_common_hw_fini(adev);
+ return nv_common_hw_fini(ip_block);
}
-static int nv_common_resume(void *handle)
+static int nv_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return nv_common_hw_init(adev);
+ return nv_common_hw_init(ip_block);
}
-static bool nv_common_is_idle(void *handle)
+static bool nv_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int nv_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int nv_common_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int nv_common_set_clockgating_state(void *handle,
+static int nv_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1089,16 +1071,16 @@ static int nv_common_set_clockgating_state(void *handle,
return 0;
}
-static int nv_common_set_powergating_state(void *handle,
+static int nv_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* TODO */
return 0;
}
-static void nv_common_get_clockgating_state(void *handle, u64 *flags)
+static void nv_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
*flags = 0;
@@ -1115,17 +1097,12 @@ static const struct amd_ip_funcs nv_common_ip_funcs = {
.early_init = nv_common_early_init,
.late_init = nv_common_late_init,
.sw_init = nv_common_sw_init,
- .sw_fini = nv_common_sw_fini,
.hw_init = nv_common_hw_init,
.hw_fini = nv_common_hw_fini,
.suspend = nv_common_suspend,
.resume = nv_common_resume,
.is_idle = nv_common_is_idle,
- .wait_for_idle = nv_common_wait_for_idle,
- .soft_reset = nv_common_soft_reset,
.set_clockgating_state = nv_common_set_clockgating_state,
.set_powergating_state = nv_common_set_powergating_state,
.get_clockgating_state = nv_common_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h
index 631dafb92299..56f1bfac0b20 100644
--- a/drivers/gpu/drm/amd/amdgpu/nvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/nvd.h
@@ -64,6 +64,24 @@
#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19
#define PACKET3_ATOMIC_GDS 0x1D
#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x7F) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)))
+#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
+#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
+#define PACKET3_ATOMIC_MEM__COMMAND__WAIT_FOR_WRITE_CONFIRMATION 2
+#define PACKET3_ATOMIC_MEM__COMMAND__SEND_AND_CONTINUE 3
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__LRU 0
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__STREAM 1
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__NOA 2
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY__BYPASS 3
#define PACKET3_OCCLUSION_QUERY 0x1F
#define PACKET3_SET_PREDICATION 0x20
#define PACKET3_REG_RMW 0x21
@@ -105,6 +123,38 @@
* 1 - pfp
* 2 - ce
*/
+#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_WRITE_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WRITE_DATA__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 24)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_LO(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
+#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
+#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
+#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
+#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
+#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
+#define PACKET3_WRITE_DATA__MODE__PF_VF_DISABLED 0
+#define PACKET3_WRITE_DATA__MODE__PF_VF_ENABLED 1
+#define PACKET3_WRITE_DATA__TEMPORAL__RT 0
+#define PACKET3_WRITE_DATA__TEMPORAL__NT 1
+#define PACKET3_WRITE_DATA__TEMPORAL__HT 2
+#define PACKET3_WRITE_DATA__TEMPORAL__LU 3
+#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
+#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
+#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
#define PACKET3_MEM_SEMAPHORE 0x39
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
@@ -135,6 +185,42 @@
/* 0 - me
* 1 - pfp
*/
+#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
+#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
+#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0X3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
+#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
+#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
+#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__LRU 0
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__STREAM 1
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__NOA 2
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__BYPASS 3
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__RT 0
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__NT 1
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__HT 2
+#define PACKET3_WAIT_REG_MEM__TEMPORAL__LU 3
#define PACKET3_INDIRECT_BUFFER 0x3F
#define INDIRECT_BUFFER_VALID (1 << 23)
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
@@ -144,8 +230,94 @@
*/
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
+#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
+#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
+#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__RT 0
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__NT 1
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__HT 2
+#define PACKET3_INDIRECT_BUFFER__TEMPORAL__LU 3
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__NOA 2
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__BYPASS 3
#define PACKET3_COND_INDIRECT_BUFFER 0x3F
#define PACKET3_COPY_DATA 0x40
+#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
+#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__SRC_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_COPY_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 23)
+#define PACKET3_COPY_DATA__DST_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET_LO(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_COPY_DATA__DST_REG_OFFSET_LO(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2_OBSOLETE 1
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
+#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
+#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
+#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
+#define PACKET3_COPY_DATA__SRC_SEL__SYSTEM_CLOCK_COUNT 10
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__DST_SEL__GDS 3
+#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2_OBSOLETE 5
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__RT 0
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__NT 1
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__HT 2
+#define PACKET3_COPY_DATA__SRC_TEMPORAL__LU 3
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__NOA 2
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__BYPASS 3
+#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
+#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
+#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
+#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
+#define PACKET3_COPY_DATA__MODE__PF_VF_DISABLED 0
+#define PACKET3_COPY_DATA__MODE__PF_VF_ENABLED 1
+#define PACKET3_COPY_DATA__DST_TEMPORAL__RT 0
+#define PACKET3_COPY_DATA__DST_TEMPORAL__NT 1
+#define PACKET3_COPY_DATA__DST_TEMPORAL__HT 2
+#define PACKET3_COPY_DATA__DST_TEMPORAL__LU 3
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__NOA 2
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__BYPASS 3
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
#define PACKET3_CP_DMA 0x41
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_SURFACE_SYNC 0x43
@@ -160,6 +332,23 @@
* 3 - SAMPLE_STREAMOUTSTAT*
* 4 - *S_PARTIAL_FLUSH
*/
+#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
+#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 0)
+#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) ((unsigned)(x))
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTAT 2
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS 8
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS1 9
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS2 10
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS3 11
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__LEGACY_MODE 0
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE1 1
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__NEW_MODE 2
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE3 3
#define PACKET3_EVENT_WRITE_EOP 0x47
#define PACKET3_EVENT_WRITE_EOS 0x48
#define PACKET3_RELEASE_MEM 0x49
@@ -304,6 +493,12 @@
* 2: REVERSE
*/
#define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18)
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FFFF) << 0)
#define PACKET3_REWIND 0x59
#define PACKET3_INTERRUPT 0x5A
#define PACKET3_GEN_PDEPTE 0x5B
@@ -330,11 +525,17 @@
#define PACKET3_SET_SH_REG 0x76
#define PACKET3_SET_SH_REG_START 0x00002c00
#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
+#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
+#define PACKET3_SET_SH_REG__INDEX__DEFAULT 0
+#define PACKET3_SET_SH_REG__INDEX__INSERT_VMID 1
#define PACKET3_SET_SH_REG_OFFSET 0x77
#define PACKET3_SET_QUEUE_REG 0x78
#define PACKET3_SET_UCONFIG_REG 0x79
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
#define PACKET3_FORWARD_HEADER 0x7C
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
@@ -369,6 +570,7 @@
# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
#define PACKET3_AQL_PACKET 0x99
#define PACKET3_DMA_DATA_FILL_MULTI 0x9A
#define PACKET3_SET_SH_REG_INDEX 0x9B
@@ -462,6 +664,12 @@
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
#define PACKET3_RUN_LIST 0xA5
#define PACKET3_MAP_PROCESS_VM 0xA6
+
+#define PACKET3_RUN_CLEANER_SHADER 0xD2
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
+
/* GFX11 */
#define PACKET3_SET_Q_PREEMPTION_MODE 0xF0
# define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 37b5ddd6f13b..f4a91b126c73 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -103,6 +103,10 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */
GFX_CMD_ID_BOOT_CFG = 0x00000022, /* Boot Config */
GFX_CMD_ID_SRIOV_SPATIAL_PART = 0x00000027, /* Configure spatial partitioning mode */
+ /*IDs of performance monitoring/profiling*/
+ GFX_CMD_ID_CONFIG_SQ_PERFMON = 0x00000046, /* Config CGTT_SQ_CLK_CTRL */
+ /* Dynamic memory partitioninig (NPS mode change)*/
+ GFX_CMD_ID_FB_NPS_MODE = 0x00000048, /* Configure memory partitioning mode */
};
/* PSP boot config sub-commands */
@@ -351,6 +355,20 @@ struct psp_gfx_cmd_sriov_spatial_part {
uint32_t override_this_aid;
};
+/*Structure for sq performance monitoring/profiling enable/disable*/
+struct psp_gfx_cmd_config_sq_perfmon {
+ uint32_t gfx_xcp_mask;
+ uint8_t core_override;
+ uint8_t reg_override;
+ uint8_t perfmon_override;
+ uint8_t reserved[5];
+};
+
+struct psp_gfx_cmd_fb_memory_part {
+ uint32_t mode; /* requested NPS mode */
+ uint32_t resvd;
+};
+
/* All GFX ring buffer commands. */
union psp_gfx_commands
{
@@ -365,6 +383,8 @@ union psp_gfx_commands
struct psp_gfx_cmd_load_toc cmd_load_toc;
struct psp_gfx_cmd_boot_cfg boot_cfg;
struct psp_gfx_cmd_sriov_spatial_part cmd_spatial_part;
+ struct psp_gfx_cmd_config_sq_perfmon config_sq_perfmon;
+ struct psp_gfx_cmd_fb_memory_part cmd_memory_part;
};
struct psp_gfx_uresp_reserved
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 2395f1856962..215543575f47 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -129,6 +129,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
err = psp_init_ta_microcode(psp, ucode_prefix);
break;
case IP_VERSION(11, 5, 0):
+ case IP_VERSION(11, 5, 2):
err = psp_init_asd_microcode(psp, ucode_prefix);
if (err)
return err;
@@ -532,7 +533,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
}
memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
vfree(buf);
drm_dev_exit(idx);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index fcd708eae75c..80153f837470 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -34,9 +34,6 @@
#include "sdma0/sdma0_4_0_offset.h"
#include "nbio/nbio_7_4_offset.h"
-#include "oss/osssys_4_0_offset.h"
-#include "oss/osssys_4_0_sh_mask.h"
-
MODULE_FIRMWARE("amdgpu/renoir_asd.bin");
MODULE_FIRMWARE("amdgpu/renoir_ta.bin");
MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin");
@@ -99,9 +96,6 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp)
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
0x80000000, 0x80000000, false);
@@ -138,8 +132,6 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
psp_gfxdrv_command_reg);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
0, true);
@@ -147,37 +139,6 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp)
return ret;
}
-static void psp_v12_0_reroute_ih(struct psp_context *psp)
-{
- struct amdgpu_device *adev = psp->adev;
- uint32_t tmp;
-
- /* Change IH ring for VMC */
- tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
-
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
-
- mdelay(20);
- psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
-
- /* Change IH ring for UMC */
- tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b);
- tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
-
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET);
-
- mdelay(20);
- psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
-}
-
static int psp_v12_0_ring_create(struct psp_context *psp,
enum psp_ring_type ring_type)
{
@@ -186,49 +147,23 @@ static int psp_v12_0_ring_create(struct psp_context *psp,
struct psp_ring *ring = &psp->km_ring;
struct amdgpu_device *adev = psp->adev;
- psp_v12_0_reroute_ih(psp);
-
- if (amdgpu_sriov_vf(psp->adev)) {
- /* Write low address of the ring to C2PMSG_102 */
- psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
- /* Write high address of the ring to C2PMSG_103 */
- psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
-
- /* Write the ring initialization command to C2PMSG_101 */
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
- GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_101 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
- 0x80000000, 0x8000FFFF, false);
-
- } else {
- /* Write low address of the ring to C2PMSG_69 */
- psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
- /* Write high address of the ring to C2PMSG_70 */
- psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
- /* Write size of ring to C2PMSG_71 */
- psp_ring_reg = ring->ring_size;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
- /* Write the ring initialization command to C2PMSG_64 */
- psp_ring_reg = ring_type;
- psp_ring_reg = psp_ring_reg << 16;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
- }
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
return ret;
}
@@ -247,9 +182,6 @@ static int psp_v12_0_ring_stop(struct psp_context *psp,
WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
GFX_CTRL_CMD_ID_DESTROY_RINGS);
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
/* Wait for response flag (bit 31) */
if (amdgpu_sriov_vf(adev))
ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 51e470e8d67d..ead616c11705 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -42,7 +42,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
@@ -51,6 +53,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_12_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_12_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_14_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_14_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin");
@@ -69,20 +73,13 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_4_ta.bin");
/* Retry times for vmbx ready wait */
#define PSP_VMBX_POLLING_LIMIT 3000
-/* VBIOS gfl defines */
-#define MBOX_READY_MASK 0x80000000
-#define MBOX_STATUS_MASK 0x0000FFFF
-#define MBOX_COMMAND_MASK 0x00FF0000
-#define MBOX_READY_FLAG 0x80000000
-#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2
-#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
-#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
-
/* memory training timeout define */
#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
#define regMP1_PUB_SCRATCH0 0x3b10090
+#define PSP13_BL_STATUS_SIZE 100
+
static int psp_v13_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -122,6 +119,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 10):
+ case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
err = psp_init_sos_microcode(psp, ucode_prefix);
if (err)
@@ -148,6 +146,32 @@ static bool psp_v13_0_is_sos_alive(struct psp_context *psp)
return sol_reg != 0x0;
}
+static void psp_v13_0_bootloader_print_status(struct psp_context *psp,
+ const char *msg)
+{
+ struct amdgpu_device *adev = psp->adev;
+ u32 bl_status_reg;
+ char bl_status_msg[PSP13_BL_STATUS_SIZE];
+ int i, at;
+
+ if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
+ at = 0;
+ for_each_inst(i, adev->aid_mask) {
+ bl_status_reg =
+ (SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_92)
+ << 2) +
+ adev->asic_funcs->encode_ext_smn_addressing(i);
+ at += snprintf(bl_status_msg + at,
+ PSP13_BL_STATUS_SIZE - at,
+ " status(%02i): 0x%08x", i,
+ RREG32_PCIE_EXT(bl_status_reg));
+ }
+ dev_info(adev->dev, "%s - %s", msg, bl_status_msg);
+ }
+}
+
static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -177,6 +201,7 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
retry_cnt =
((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))) ?
PSP_VMBX_POLLING_LIMIT :
10;
@@ -192,6 +217,9 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
if (ret == 0)
return 0;
+ if (retry_loop && !(retry_loop % 10))
+ psp_v13_0_bootloader_print_status(
+ psp, "Waiting for bootloader completion");
}
return ret;
@@ -203,6 +231,7 @@ static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp)
int ret;
if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
ret = psp_v13_0_wait_for_vmbx_ready(psp);
if (ret)
@@ -288,6 +317,11 @@ static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp)
return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV);
}
+static int psp_v13_0_bootloader_load_spdm_drv(struct psp_context *psp)
+{
+ return psp_v13_0_bootloader_load_component(psp, &psp->spdm_drv, PSP_BL__LOAD_SPDMDRV);
+}
+
static inline void psp_v13_0_init_sos_version(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -600,7 +634,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops)
}
memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
vfree(buf);
drm_dev_exit(idx);
} else {
@@ -700,7 +734,8 @@ static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd)
/* Ring the doorbell */
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_73, 1);
- if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE)
+ if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE ||
+ cmd == C2PMSG_CMD_SPI_GET_FLASH_IMAGE)
ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT);
else
@@ -756,6 +791,37 @@ static int psp_v13_0_update_spirom(struct psp_context *psp,
return 0;
}
+static int psp_v13_0_dump_spirom(struct psp_context *psp,
+ uint64_t fw_pri_mc_addr)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret;
+
+ /* Confirm PSP is ready to start */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115),
+ MBOX_READY_FLAG, MBOX_READY_MASK, false);
+ if (ret) {
+ dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret);
+ return ret;
+ }
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_LO);
+ if (ret)
+ return ret;
+
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr));
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_ROM_IMAGE_ADDR_HI);
+ if (ret)
+ return ret;
+
+ ret = psp_v13_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_GET_FLASH_IMAGE);
+
+ return ret;
+}
+
static int psp_v13_0_vbflash_status(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -798,6 +864,7 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp)
return false;
if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) &&
(!(adev->flags & AMD_IS_APU))) {
reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127);
@@ -823,6 +890,49 @@ static bool psp_v13_0_is_aux_sos_load_required(struct psp_context *psp)
return (pmfw_ver < 0x557300);
}
+static bool psp_v13_0_is_reload_needed(struct psp_context *psp)
+{
+ uint32_t ucode_ver;
+
+ if (!psp_v13_0_is_sos_alive(psp))
+ return false;
+
+ /* Restrict reload support only to specific IP versions */
+ switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
+ case IP_VERSION(13, 0, 14):
+ /* TOS version read from microcode header */
+ ucode_ver = psp->sos.fw_version;
+ /* Read TOS version from hardware */
+ psp_v13_0_init_sos_version(psp);
+ return (ucode_ver != psp->sos.fw_version);
+ default:
+ return false;
+ }
+
+ return false;
+}
+
+static int psp_v13_0_reg_program_no_ring(struct psp_context *psp, uint32_t val,
+ enum psp_reg_prog_id id)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int ret = -EOPNOTSUPP;
+
+ /* PSP will broadcast the value to all instances */
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_GBR_IH_SET);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, id);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, val);
+
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ }
+
+ return ret;
+}
+
static const struct psp_funcs psp_v13_0_funcs = {
.init_microcode = psp_v13_0_init_microcode,
.wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
@@ -833,6 +943,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
.bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv,
.bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv,
.bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv,
+ .bootloader_load_spdm_drv = psp_v13_0_bootloader_load_spdm_drv,
.bootloader_load_sos = psp_v13_0_bootloader_load_sos,
.ring_create = psp_v13_0_ring_create,
.ring_stop = psp_v13_0_ring_stop,
@@ -843,10 +954,13 @@ static const struct psp_funcs psp_v13_0_funcs = {
.load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw,
.read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw,
.update_spirom = psp_v13_0_update_spirom,
+ .dump_spirom = psp_v13_0_dump_spirom,
.vbflash_stat = psp_v13_0_vbflash_status,
.fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
.get_ras_capability = psp_v13_0_get_ras_capability,
.is_aux_sos_load_required = psp_v13_0_is_aux_sos_load_required,
+ .is_reload_needed = psp_v13_0_is_reload_needed,
+ .reg_program_no_ring = psp_v13_0_reg_program_no_ring,
};
void psp_v13_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
index 4d33c95a5116..256288c6cd78 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
@@ -35,6 +35,8 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin");
MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_5_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_14_0_5_ta.bin");
/* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240
@@ -72,6 +74,14 @@ static int psp_v14_0_init_microcode(struct psp_context *psp)
if (err)
return err;
break;
+ case IP_VERSION(14, 0, 5):
+ err = psp_init_toc_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, ucode_prefix);
+ if (err)
+ return err;
+ break;
default:
BUG();
}
@@ -488,7 +498,7 @@ static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops)
}
memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
- adev->hdp.funcs->flush_hdp(adev, NULL);
+ amdgpu_device_flush_hdp(adev, NULL);
vfree(buf);
drm_dev_exit(idx);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 725392522267..92ce580647cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -145,9 +145,11 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
@@ -631,7 +633,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -807,9 +809,9 @@ static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int sdma_v2_4_early_init(void *handle)
+static int sdma_v2_4_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
adev->sdma.num_instances = SDMA_MAX_INSTANCE;
@@ -826,11 +828,11 @@ static int sdma_v2_4_early_init(void *handle)
return 0;
}
-static int sdma_v2_4_sw_init(void *handle)
+static int sdma_v2_4_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
@@ -866,9 +868,9 @@ static int sdma_v2_4_sw_init(void *handle)
return r;
}
-static int sdma_v2_4_sw_fini(void *handle)
+static int sdma_v2_4_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -878,10 +880,10 @@ static int sdma_v2_4_sw_fini(void *handle)
return 0;
}
-static int sdma_v2_4_hw_init(void *handle)
+static int sdma_v2_4_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v2_4_init_golden_registers(adev);
@@ -892,32 +894,26 @@ static int sdma_v2_4_hw_init(void *handle)
return r;
}
-static int sdma_v2_4_hw_fini(void *handle)
+static int sdma_v2_4_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- sdma_v2_4_enable(adev, false);
+ sdma_v2_4_enable(ip_block->adev, false);
return 0;
}
-static int sdma_v2_4_suspend(void *handle)
+static int sdma_v2_4_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v2_4_hw_fini(adev);
+ return sdma_v2_4_hw_fini(ip_block);
}
-static int sdma_v2_4_resume(void *handle)
+static int sdma_v2_4_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v2_4_hw_init(adev);
+ return sdma_v2_4_hw_init(ip_block);
}
-static bool sdma_v2_4_is_idle(void *handle)
+static bool sdma_v2_4_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -927,11 +923,11 @@ static bool sdma_v2_4_is_idle(void *handle)
return true;
}
-static int sdma_v2_4_wait_for_idle(void *handle)
+static int sdma_v2_4_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -944,10 +940,10 @@ static int sdma_v2_4_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v2_4_soft_reset(void *handle)
+static int sdma_v2_4_soft_reset(struct amdgpu_ip_block *ip_block)
{
u32 srbm_soft_reset = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
@@ -1086,14 +1082,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int sdma_v2_4_set_clockgating_state(void *handle,
+static int sdma_v2_4_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* XXX handled via the smc on VI */
return 0;
}
-static int sdma_v2_4_set_powergating_state(void *handle,
+static int sdma_v2_4_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -1102,7 +1098,6 @@ static int sdma_v2_4_set_powergating_state(void *handle,
static const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
.name = "sdma_v2_4",
.early_init = sdma_v2_4_early_init,
- .late_init = NULL,
.sw_init = sdma_v2_4_sw_init,
.sw_fini = sdma_v2_4_sw_fini,
.hw_init = sdma_v2_4_hw_init,
@@ -1114,8 +1109,6 @@ static const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
.soft_reset = sdma_v2_4_soft_reset,
.set_clockgating_state = sdma_v2_4_set_clockgating_state,
.set_powergating_state = sdma_v2_4_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index e65194fe94af..1c076bd1cf73 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -305,9 +305,11 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma.bin", chip_name);
else
err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw,
+ AMDGPU_UCODE_REQUIRED,
"amdgpu/%s_sdma1.bin", chip_name);
if (err)
goto out;
@@ -904,7 +906,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1080,9 +1082,9 @@ static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int sdma_v3_0_early_init(void *handle)
+static int sdma_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
switch (adev->asic_type) {
@@ -1106,11 +1108,11 @@ static int sdma_v3_0_early_init(void *handle)
return 0;
}
-static int sdma_v3_0_sw_init(void *handle)
+static int sdma_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
@@ -1152,9 +1154,9 @@ static int sdma_v3_0_sw_init(void *handle)
return r;
}
-static int sdma_v3_0_sw_fini(void *handle)
+static int sdma_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -1164,10 +1166,10 @@ static int sdma_v3_0_sw_fini(void *handle)
return 0;
}
-static int sdma_v3_0_hw_init(void *handle)
+static int sdma_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v3_0_init_golden_registers(adev);
@@ -1178,9 +1180,9 @@ static int sdma_v3_0_hw_init(void *handle)
return r;
}
-static int sdma_v3_0_hw_fini(void *handle)
+static int sdma_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v3_0_ctx_switch_enable(adev, false);
sdma_v3_0_enable(adev, false);
@@ -1188,23 +1190,19 @@ static int sdma_v3_0_hw_fini(void *handle)
return 0;
}
-static int sdma_v3_0_suspend(void *handle)
+static int sdma_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v3_0_hw_fini(adev);
+ return sdma_v3_0_hw_fini(ip_block);
}
-static int sdma_v3_0_resume(void *handle)
+static int sdma_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v3_0_hw_init(adev);
+ return sdma_v3_0_hw_init(ip_block);
}
-static bool sdma_v3_0_is_idle(void *handle)
+static bool sdma_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS2);
if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1214,11 +1212,11 @@ static bool sdma_v3_0_is_idle(void *handle)
return true;
}
-static int sdma_v3_0_wait_for_idle(void *handle)
+static int sdma_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
@@ -1231,9 +1229,9 @@ static int sdma_v3_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static bool sdma_v3_0_check_soft_reset(void *handle)
+static bool sdma_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS2);
@@ -1252,9 +1250,9 @@ static bool sdma_v3_0_check_soft_reset(void *handle)
}
}
-static int sdma_v3_0_pre_soft_reset(void *handle)
+static int sdma_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
if (!adev->sdma.srbm_soft_reset)
@@ -1271,9 +1269,9 @@ static int sdma_v3_0_pre_soft_reset(void *handle)
return 0;
}
-static int sdma_v3_0_post_soft_reset(void *handle)
+static int sdma_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
if (!adev->sdma.srbm_soft_reset)
@@ -1290,9 +1288,9 @@ static int sdma_v3_0_post_soft_reset(void *handle)
return 0;
}
-static int sdma_v3_0_soft_reset(void *handle)
+static int sdma_v3_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp;
@@ -1487,10 +1485,10 @@ static void sdma_v3_0_update_sdma_medium_grain_light_sleep(
}
}
-static int sdma_v3_0_set_clockgating_state(void *handle,
+static int sdma_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1510,15 +1508,15 @@ static int sdma_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v3_0_set_powergating_state(void *handle,
+static int sdma_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v3_0_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1538,7 +1536,6 @@ static void sdma_v3_0_get_clockgating_state(void *handle, u64 *flags)
static const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
.name = "sdma_v3_0",
.early_init = sdma_v3_0_early_init,
- .late_init = NULL,
.sw_init = sdma_v3_0_sw_init,
.sw_fini = sdma_v3_0_sw_fini,
.hw_init = sdma_v3_0_hw_init,
@@ -1554,8 +1551,6 @@ static const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
.set_clockgating_state = sdma_v3_0_set_clockgating_state,
.set_powergating_state = sdma_v3_0_set_powergating_state,
.get_clockgating_state = sdma_v3_0_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 23ef4eb36b40..33ed2b158fcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1565,7 +1565,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1751,9 +1751,9 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
}
}
-static int sdma_v4_0_early_init(void *handle)
+static int sdma_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = sdma_v4_0_init_microcode(adev);
@@ -1780,9 +1780,9 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
void *err_data,
struct amdgpu_iv_entry *entry);
-static int sdma_v4_0_late_init(void *handle)
+static int sdma_v4_0_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v4_0_setup_ulv(adev);
@@ -1792,11 +1792,11 @@ static int sdma_v4_0_late_init(void *handle)
return 0;
}
-static int sdma_v4_0_sw_init(void *handle)
+static int sdma_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
uint32_t *ptr;
@@ -1929,9 +1929,9 @@ static int sdma_v4_0_sw_init(void *handle)
return r;
}
-static int sdma_v4_0_sw_fini(void *handle)
+static int sdma_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1951,12 +1951,12 @@ static int sdma_v4_0_sw_fini(void *handle)
return 0;
}
-static int sdma_v4_0_hw_init(void *handle)
+static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->flags & AMD_IS_APU)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false, 0);
if (!amdgpu_sriov_vf(adev))
sdma_v4_0_init_golden_registers(adev);
@@ -1964,9 +1964,9 @@ static int sdma_v4_0_hw_init(void *handle)
return sdma_v4_0_start(adev);
}
-static int sdma_v4_0_hw_fini(void *handle)
+static int sdma_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
if (amdgpu_sriov_vf(adev))
@@ -1983,14 +1983,14 @@ static int sdma_v4_0_hw_fini(void *handle)
sdma_v4_0_enable(adev, false);
if (adev->flags & AMD_IS_APU)
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true, 0);
return 0;
}
-static int sdma_v4_0_suspend(void *handle)
+static int sdma_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SMU saves SDMA state for us */
if (adev->in_s0ix) {
@@ -1998,12 +1998,12 @@ static int sdma_v4_0_suspend(void *handle)
return 0;
}
- return sdma_v4_0_hw_fini(adev);
+ return sdma_v4_0_hw_fini(ip_block);
}
-static int sdma_v4_0_resume(void *handle)
+static int sdma_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* SMU restores SDMA state for us */
if (adev->in_s0ix) {
@@ -2012,12 +2012,12 @@ static int sdma_v4_0_resume(void *handle)
return 0;
}
- return sdma_v4_0_hw_init(adev);
+ return sdma_v4_0_hw_init(ip_block);
}
-static bool sdma_v4_0_is_idle(void *handle)
+static bool sdma_v4_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -2030,11 +2030,11 @@ static bool sdma_v4_0_is_idle(void *handle)
return true;
}
-static int sdma_v4_0_wait_for_idle(void *handle)
+static int sdma_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i, j;
u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
for (j = 0; j < adev->sdma.num_instances; j++) {
@@ -2049,7 +2049,7 @@ static int sdma_v4_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v4_0_soft_reset(void *handle)
+static int sdma_v4_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
@@ -2297,10 +2297,10 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
}
}
-static int sdma_v4_0_set_clockgating_state(void *handle,
+static int sdma_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -2312,10 +2312,10 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v4_0_set_powergating_state(void *handle,
+static int sdma_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
case IP_VERSION(4, 1, 0):
@@ -2331,9 +2331,9 @@ static int sdma_v4_0_set_powergating_state(void *handle,
return 0;
}
-static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v4_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -2350,9 +2350,9 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
-static void sdma_v4_0_print_ip_state(void *handle, struct drm_printer *p)
+static void sdma_v4_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
uint32_t instance_offset;
@@ -2371,9 +2371,9 @@ static void sdma_v4_0_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void sdma_v4_0_dump_ip_state(void *handle)
+static void sdma_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t instance_offset;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_0);
@@ -2381,7 +2381,6 @@ static void sdma_v4_0_dump_ip_state(void *handle)
if (!adev->sdma.ip_dump)
return;
- amdgpu_gfx_off_ctrl(adev, false);
for (i = 0; i < adev->sdma.num_instances; i++) {
instance_offset = i * reg_count;
for (j = 0; j < reg_count; j++)
@@ -2389,7 +2388,6 @@ static void sdma_v4_0_dump_ip_state(void *handle)
RREG32(sdma_v4_0_get_reg_offset(adev, i,
sdma_reg_list_4_0[j].reg_offset));
}
- amdgpu_gfx_off_ctrl(adev, true);
}
const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index c77889040760..cef68df4c663 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -30,6 +30,7 @@
#include "amdgpu_xcp.h"
#include "amdgpu_ucode.h"
#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
#include "sdma/sdma_4_4_2_offset.h"
#include "sdma/sdma_4_4_2_sh_mask.h"
@@ -105,6 +106,9 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev);
+static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev);
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring);
static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
u32 instance, u32 offset)
@@ -189,6 +193,7 @@ static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) ||
amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
ret = amdgpu_sdma_init_microcode(adev, 0, true);
break;
@@ -485,7 +490,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
{
struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
u32 doorbell_offset, doorbell;
- u32 rb_cntl, ib_cntl;
+ u32 rb_cntl, ib_cntl, sdma_cntl;
int i;
for_each_inst(i, inst_mask) {
@@ -497,6 +502,9 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0);
WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+ sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0);
+ WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl);
if (sdma[i]->use_doorbell) {
doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
@@ -667,11 +675,12 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
*
* @adev: amdgpu_device pointer
* @i: instance to resume
+ * @restore: used to restore wptr when restart
*
* Set up the gfx DMA ring buffers and enable them.
* Returns 0 for success, error for failure.
*/
-static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
+static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
@@ -679,6 +688,7 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
u32 doorbell;
u32 doorbell_offset;
u64 wptr_gpu_addr;
+ u64 rwptr;
wb_offset = (ring->rptr_offs * 4);
@@ -698,16 +708,32 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8);
WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
- ring->wptr = 0;
+ if (!restore)
+ ring->wptr = 0;
/* before programing wptr to a less value, need set minor_ptr_update first */
WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1);
+ /* For the guilty queue, set RPTR to the current wptr to skip bad commands,
+ * It is not a guilty queue, restore cache_rptr and continue execution.
+ */
+ if (adev->sdma.instance[i].gfx_guilty)
+ rwptr = ring->wptr;
+ else
+ rwptr = ring->cached_rptr;
+
/* Initialize the ring buffer's read and write pointers */
- WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
- WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
- WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
- WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
+ if (restore) {
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(rwptr << 2));
+ } else {
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
+ }
doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
@@ -755,11 +781,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
*
* @adev: amdgpu_device pointer
* @i: instance to resume
+ * @restore: boolean to say restore needed or not
*
* Set up the page DMA ring buffers and enable them.
* Returns 0 for success, error for failure.
*/
-static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
+static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
@@ -767,6 +794,7 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
u32 doorbell;
u32 doorbell_offset;
u64 wptr_gpu_addr;
+ u64 rwptr;
wb_offset = (ring->rptr_offs * 4);
@@ -774,11 +802,26 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
+ /* For the guilty queue, set RPTR to the current wptr to skip bad commands,
+ * It is not a guilty queue, restore cache_rptr and continue execution.
+ */
+ if (adev->sdma.instance[i].page_guilty)
+ rwptr = ring->wptr;
+ else
+ rwptr = ring->cached_rptr;
+
/* Initialize the ring buffer's read and write pointers */
- WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
- WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
- WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
- WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
+ if (restore) {
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, upper_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, lower_32_bits(rwptr << 2));
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, upper_32_bits(rwptr << 2));
+ } else {
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
+ WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
+ }
/* set the wb address whether it's enabled or not */
WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI,
@@ -792,7 +835,8 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8);
WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
- ring->wptr = 0;
+ if (!restore)
+ ring->wptr = 0;
/* before programing wptr to a less value, need set minor_ptr_update first */
WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1);
@@ -911,12 +955,13 @@ static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @inst_mask: mask of dma engine instances to be enabled
+ * @restore: boolean to say restore needed or not
*
* Set up the DMA engines and enable them.
* Returns 0 for success, error for failure.
*/
static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
- uint32_t inst_mask)
+ uint32_t inst_mask, bool restore)
{
struct amdgpu_ring *ring;
uint32_t tmp_mask;
@@ -927,7 +972,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
sdma_v4_4_2_inst_enable(adev, false, inst_mask);
} else {
/* bypass sdma microcode loading on Gopher */
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
+ if (!restore && adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
adev->sdma.instance[0].fw) {
r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask);
if (r)
@@ -946,17 +991,20 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
uint32_t temp;
WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
- sdma_v4_4_2_gfx_resume(adev, i);
+ sdma_v4_4_2_gfx_resume(adev, i, restore);
if (adev->sdma.has_page_queue)
- sdma_v4_4_2_page_resume(adev, i);
+ sdma_v4_4_2_page_resume(adev, i, restore);
/* set utc l1 enable flag always to 1 */
temp = RREG32_SDMA(i, regSDMA_CNTL);
temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
- /* enable context empty interrupt during initialization */
- temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1);
WREG32_SDMA(i, regSDMA_CNTL, temp);
+ if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) {
+ /* enable context empty interrupt during initialization */
+ temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1);
+ WREG32_SDMA(i, regSDMA_CNTL, temp);
+ }
if (!amdgpu_sriov_vf(adev)) {
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
/* unhalt engine */
@@ -1110,7 +1158,7 @@ static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -1290,9 +1338,14 @@ static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev)
}
}
-static int sdma_v4_4_2_early_init(void *handle)
+static const struct amdgpu_sdma_funcs sdma_v4_4_2_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v4_4_2_stop_queue,
+ .start_kernel_queue = &sdma_v4_4_2_restore_queue,
+};
+
+static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = sdma_v4_4_2_init_microcode(adev);
@@ -1308,7 +1361,6 @@ static int sdma_v4_4_2_early_init(void *handle)
sdma_v4_4_2_set_vm_pte_funcs(adev);
sdma_v4_4_2_set_irq_funcs(adev);
sdma_v4_4_2_set_ras_funcs(adev);
-
return 0;
}
@@ -1318,9 +1370,9 @@ static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
#endif
-static int sdma_v4_4_2_late_init(void *handle)
+static int sdma_v4_4_2_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
#if 0
struct ras_ih_if ih_info = {
.cb = sdma_v4_4_2_process_ras_data_cb,
@@ -1329,14 +1381,20 @@ static int sdma_v4_4_2_late_init(void *handle)
if (!amdgpu_persistent_edc_harvesting_supported(adev))
amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA);
+ /* The initialization is done in the late_init stage to ensure that the SMU
+ * initialization and capability setup are completed before we check the SDMA
+ * reset capability
+ */
+ sdma_v4_4_2_update_reset_mask(adev);
+
return 0;
}
-static int sdma_v4_4_2_sw_init(void *handle)
+static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 aid_id;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
uint32_t *ptr;
@@ -1384,9 +1442,21 @@ static int sdma_v4_4_2_sw_init(void *handle)
&adev->sdma.srbm_write_irq);
if (r)
return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_CTXEMPTY,
+ &adev->sdma.ctxt_empty_irq);
+ if (r)
+ return r;
}
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ /* Initialize guilty flags for GFX and PAGE queues */
+ adev->sdma.instance[i].gfx_guilty = false;
+ adev->sdma.instance[i].page_guilty = false;
+ adev->sdma.instance[i].funcs = &sdma_v4_4_2_sdma_funcs;
+
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -1430,6 +1500,9 @@ static int sdma_v4_4_2_sw_init(void *handle)
}
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+
if (amdgpu_sdma_ras_sw_init(adev)) {
dev_err(adev->dev, "fail to initialize sdma ras block\n");
return -EINVAL;
@@ -1442,12 +1515,16 @@ static int sdma_v4_4_2_sw_init(void *handle)
else
DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v4_4_2_sw_fini(void *handle)
+static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1456,7 +1533,9 @@ static int sdma_v4_4_2_sw_fini(void *handle)
amdgpu_ring_fini(&adev->sdma.instance[i].page);
}
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) ||
amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5))
amdgpu_sdma_destroy_inst_ctx(adev, true);
else
@@ -1467,24 +1546,24 @@ static int sdma_v4_4_2_sw_fini(void *handle)
return 0;
}
-static int sdma_v4_4_2_hw_init(void *handle)
+static int sdma_v4_4_2_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t inst_mask;
inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
if (!amdgpu_sriov_vf(adev))
sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
- r = sdma_v4_4_2_inst_start(adev, inst_mask);
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, false);
return r;
}
-static int sdma_v4_4_2_hw_fini(void *handle)
+static int sdma_v4_4_2_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t inst_mask;
int i;
@@ -1505,29 +1584,27 @@ static int sdma_v4_4_2_hw_fini(void *handle)
return 0;
}
-static int sdma_v4_4_2_set_clockgating_state(void *handle,
+static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
-static int sdma_v4_4_2_suspend(void *handle)
+static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_in_reset(adev))
- sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ sdma_v4_4_2_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
- return sdma_v4_4_2_hw_fini(adev);
+ return sdma_v4_4_2_hw_fini(ip_block);
}
-static int sdma_v4_4_2_resume(void *handle)
+static int sdma_v4_4_2_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v4_4_2_hw_init(adev);
+ return sdma_v4_4_2_hw_init(ip_block);
}
-static bool sdma_v4_4_2_is_idle(void *handle)
+static bool sdma_v4_4_2_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1540,11 +1617,11 @@ static bool sdma_v4_4_2_is_idle(void *handle)
return true;
}
-static int sdma_v4_4_2_wait_for_idle(void *handle)
+static int sdma_v4_4_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i, j;
u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
for (j = 0; j < adev->sdma.num_instances; j++) {
@@ -1559,13 +1636,119 @@ static int sdma_v4_4_2_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v4_4_2_soft_reset(void *handle)
+static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
}
+static bool sdma_v4_4_2_is_queue_selected(struct amdgpu_device *adev, uint32_t instance_id, bool is_page_queue)
+{
+ uint32_t reg_offset = is_page_queue ? regSDMA_PAGE_CONTEXT_STATUS : regSDMA_GFX_CONTEXT_STATUS;
+ uint32_t context_status = RREG32(sdma_v4_4_2_get_reg_offset(adev, instance_id, reg_offset));
+
+ /* Check if the SELECTED bit is set */
+ return (context_status & SDMA_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0;
+}
+
+static bool sdma_v4_4_2_ring_is_guilty(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t instance_id = ring->me;
+
+ return sdma_v4_4_2_is_queue_selected(adev, instance_id, false);
+}
+
+static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t instance_id = ring->me;
+
+ if (!adev->sdma.has_page_queue)
+ return false;
+
+ return sdma_v4_4_2_is_queue_selected(adev, instance_id, true);
+}
+
+static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 id = ring->me;
+ int r;
+
+ if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+ return -EOPNOTSUPP;
+
+ amdgpu_amdkfd_suspend(adev, false);
+ r = amdgpu_sdma_reset_engine(adev, id);
+ amdgpu_amdkfd_resume(adev, false);
+
+ return r;
+}
+
+static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 instance_id = ring->me;
+ u32 inst_mask;
+ uint64_t rptr;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ /* Check if this queue is the guilty one */
+ adev->sdma.instance[instance_id].gfx_guilty =
+ sdma_v4_4_2_is_queue_selected(adev, instance_id, false);
+ if (adev->sdma.has_page_queue)
+ adev->sdma.instance[instance_id].page_guilty =
+ sdma_v4_4_2_is_queue_selected(adev, instance_id, true);
+
+ /* Cache the rptr before reset, after the reset,
+ * all of the registers will be reset to 0
+ */
+ rptr = amdgpu_ring_get_rptr(ring);
+ ring->cached_rptr = rptr;
+ /* Cache the rptr for the page queue if it exists */
+ if (adev->sdma.has_page_queue) {
+ struct amdgpu_ring *page_ring = &adev->sdma.instance[instance_id].page;
+ rptr = amdgpu_ring_get_rptr(page_ring);
+ page_ring->cached_rptr = rptr;
+ }
+
+ /* stop queue */
+ inst_mask = 1 << ring->me;
+ sdma_v4_4_2_inst_gfx_stop(adev, inst_mask);
+ if (adev->sdma.has_page_queue)
+ sdma_v4_4_2_inst_page_stop(adev, inst_mask);
+
+ return 0;
+}
+
+static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_mask;
+ int i;
+
+ inst_mask = 1 << ring->me;
+ udelay(50);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SDMA(ring->me, regSDMA_F32_CNTL), SDMA_F32_CNTL, HALT))
+ break;
+ udelay(1);
+ }
+
+ if (i == adev->usec_timeout) {
+ dev_err(adev->dev, "timed out waiting for SDMA%d unhalt after reset\n",
+ ring->me);
+ return -ETIMEDOUT;
+ }
+
+ return sdma_v4_4_2_inst_start(adev, inst_mask, true);
+}
+
static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -1611,6 +1794,9 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev,
case 0:
amdgpu_fence_process(&adev->sdma.instance[i].ring);
break;
+ case 1:
+ amdgpu_fence_process(&adev->sdma.instance[i].page);
+ break;
default:
break;
}
@@ -1748,6 +1934,16 @@ static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev,
return 0;
}
+static int sdma_v4_4_2_process_ctxt_empty_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ /* There is nothing useful to be done here, only kept for debug */
+ dev_dbg_ratelimited(adev->dev, "SDMA context empty interrupt");
+ sdma_v4_4_2_print_iv_entry(adev, entry);
+ return 0;
+}
+
static void sdma_v4_4_2_inst_update_medium_grain_light_sleep(
struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
{
@@ -1814,10 +2010,10 @@ static void sdma_v4_4_2_inst_update_medium_grain_clock_gating(
}
}
-static int sdma_v4_4_2_set_clockgating_state(void *handle,
+static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t inst_mask;
if (amdgpu_sriov_vf(adev))
@@ -1832,15 +2028,15 @@ static int sdma_v4_4_2_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v4_4_2_set_powergating_state(void *handle,
+static int sdma_v4_4_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v4_4_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1857,9 +2053,9 @@ static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
-static void sdma_v4_4_2_print_ip_state(void *handle, struct drm_printer *p)
+static void sdma_v4_4_2_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
uint32_t instance_offset;
@@ -1878,9 +2074,9 @@ static void sdma_v4_4_2_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void sdma_v4_4_2_dump_ip_state(void *handle)
+static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t instance_offset;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_4_4_2);
@@ -1888,7 +2084,6 @@ static void sdma_v4_4_2_dump_ip_state(void *handle)
if (!adev->sdma.ip_dump)
return;
- amdgpu_gfx_off_ctrl(adev, false);
for (i = 0; i < adev->sdma.num_instances; i++) {
instance_offset = i * reg_count;
for (j = 0; j < reg_count; j++)
@@ -1896,7 +2091,6 @@ static void sdma_v4_4_2_dump_ip_state(void *handle)
RREG32(sdma_v4_4_2_get_reg_offset(adev, i,
sdma_reg_list_4_4_2[j].reg_offset));
}
- amdgpu_gfx_off_ctrl(adev, true);
}
const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = {
@@ -1948,6 +2142,8 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
.emit_wreg = sdma_v4_4_2_ring_emit_wreg,
.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = sdma_v4_4_2_reset_queue,
+ .is_guilty = sdma_v4_4_2_ring_is_guilty,
};
static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
@@ -1979,6 +2175,8 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
.emit_wreg = sdma_v4_4_2_ring_emit_wreg,
.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = sdma_v4_4_2_reset_queue,
+ .is_guilty = sdma_v4_4_2_page_ring_is_guilty,
};
static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev)
@@ -2031,6 +2229,10 @@ static const struct amdgpu_irq_src_funcs sdma_v4_4_2_srbm_write_irq_funcs = {
.process = sdma_v4_4_2_process_srbm_write_irq,
};
+static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ctxt_empty_irq_funcs = {
+ .process = sdma_v4_4_2_process_ctxt_empty_irq,
+};
+
static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
{
adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
@@ -2039,6 +2241,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.ctxt_empty_irq.num_types = adev->sdma.num_instances;
adev->sdma.trap_irq.funcs = &sdma_v4_4_2_trap_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &sdma_v4_4_2_illegal_inst_irq_funcs;
@@ -2047,6 +2250,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_4_2_doorbell_invalid_irq_funcs;
adev->sdma.pool_timeout_irq.funcs = &sdma_v4_4_2_pool_timeout_irq_funcs;
adev->sdma.srbm_write_irq.funcs = &sdma_v4_4_2_srbm_write_irq_funcs;
+ adev->sdma.ctxt_empty_irq.funcs = &sdma_v4_4_2_ctxt_empty_irq_funcs;
}
/**
@@ -2144,6 +2348,40 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev)
adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
}
+/**
+ * sdma_v4_4_2_update_reset_mask - update reset mask for SDMA
+ * @adev: Pointer to the AMDGPU device structure
+ *
+ * This function update reset mask for SDMA and sets the supported
+ * reset types based on the IP version and firmware versions.
+ *
+ */
+static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
+{
+ /* per queue reset not supported for SRIOV */
+ if (amdgpu_sriov_vf(adev))
+ return;
+
+ /*
+ * the user queue relies on MEC fw and pmfw when the sdma queue do reset.
+ * it needs to check both of them at here to skip old mec and pmfw.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(9, 4, 3):
+ case IP_VERSION(9, 4, 4):
+ if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ case IP_VERSION(9, 5, 0):
+ if ((adev->gfx.mec_fw_version >= 0xf) && amdgpu_dpm_reset_sdma_is_supported(adev))
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
+}
+
const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = {
.type = AMD_IP_BLOCK_TYPE_SDMA,
.major = 4,
@@ -2160,7 +2398,7 @@ static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask)
if (!amdgpu_sriov_vf(adev))
sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
- r = sdma_v4_4_2_inst_start(adev, inst_mask);
+ r = sdma_v4_4_2_inst_start(adev, inst_mask, false);
return r;
}
@@ -2305,11 +2543,13 @@ static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_ban
misc0 = bank->regs[ACA_REG_IDX_MISC0];
switch (type) {
case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
1ULL);
break;
case ACA_SMU_TYPE_CE:
- ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
ACA_REG__MISC0__ERRCNT(misc0));
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 3e48ea38385d..1813c3ed0aa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -112,6 +112,8 @@ static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring);
static const struct soc15_reg_golden golden_settings_sdma_5[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
@@ -369,67 +371,36 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
DRM_DEBUG("Setting write pointer\n");
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
-
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
- *wptr_saved = ring->wptr << 2;
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, ring->wptr << 2);
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
- } else {
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
-
- if (*is_queue_unmap)
- WDOORBELL64(aggregated_db_index,
- ring->wptr << 2);
- }
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
- if (ring->use_doorbell) {
- DRM_DEBUG("Using doorbell -- "
- "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
- ring->wptr_offs,
- lower_32_bits(ring->wptr << 2),
- upper_32_bits(ring->wptr << 2));
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr << 2);
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
- } else {
- DRM_DEBUG("Not using doorbell -- "
- "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
- "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
- ring->me,
- lower_32_bits(ring->wptr << 2),
- ring->me,
- upper_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
- ring->me, mmSDMA0_GFX_RB_WPTR),
- lower_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
- ring->me, mmSDMA0_GFX_RB_WPTR_HI),
- upper_32_bits(ring->wptr << 2));
- }
+ DRM_DEBUG("Not using doorbell -- "
+ "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
}
}
@@ -575,11 +546,9 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
}
if (flags & AMDGPU_FENCE_FLAG_INT) {
- uint32_t ctx = ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
- amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
}
}
@@ -588,15 +557,15 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
* sdma_v5_0_gfx_stop - stop the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
+ * @inst_mask: mask of dma engine instances to be disabled
* Stop the gfx async dma ring buffers (NAVI10).
*/
-static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask)
{
u32 rb_cntl, ib_cntl;
int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
@@ -688,9 +657,11 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
int i;
+ uint32_t inst_mask;
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
if (!enable) {
- sdma_v5_0_gfx_stop(adev);
+ sdma_v5_0_gfx_stop(adev, 1 << inst_mask);
sdma_v5_0_rlc_stop(adev);
}
@@ -705,14 +676,16 @@ static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
}
/**
- * sdma_v5_0_gfx_resume - setup and start the async dma engines
+ * sdma_v5_0_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them (NAVI10).
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
+static int sdma_v5_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
@@ -722,142 +695,163 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
u32 temp;
u32 wptr_poll_cntl;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
+ ring = &adev->sdma.instance[i].ring;
- if (!amdgpu_sriov_vf(adev))
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
-
- /* setup the wptr shadow polling */
- wptr_gpu_addr = ring->wptr_gpu_addr;
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
- wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
- mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
- SDMA0_GFX_RB_WPTR_POLL_CNTL,
- F32_POLL_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
- wptr_poll_cntl);
-
- /* set the wb address whether it's enabled or not */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
- upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
- lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
-
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE),
- ring->gpu_addr >> 8);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI),
- ring->gpu_addr >> 40);
-
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE),
+ ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI),
+ ring->gpu_addr >> 40);
+
+ if (!restore)
ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
- lower_32_bits(ring->wptr << 2));
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
- upper_32_bits(ring->wptr << 2));
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
- doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
- doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
- mmSDMA0_GFX_DOORBELL_OFFSET));
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
+ mmSDMA0_GFX_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
- }
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET),
- doorbell_offset);
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET),
+ doorbell_offset);
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index, 20);
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index, 20);
- if (amdgpu_sriov_vf(adev))
- sdma_v5_0_ring_set_wptr(ring);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_0_ring_set_wptr(ring);
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
- if (!amdgpu_sriov_vf(adev)) {
- /* set utc l1 enable flag always to 1 */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-
- /* enable MCBP */
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
- /* Set up RESP_MODE to non-copy addresses */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
-
- /* program default cache read and write policy */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
- /* clean read policy and write policy bits */
- temp &= 0xFF0FFF;
- temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
- }
+ if (!amdgpu_sriov_vf(adev)) {
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+ /* enable MCBP */
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+ }
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
- }
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v5_0_ctx_switch_enable(adev, true);
- sdma_v5_0_enable(adev, true);
- }
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_0_ctx_switch_enable(adev, true);
+ sdma_v5_0_enable(adev, true);
+ }
+
+ return amdgpu_ring_test_helper(ring);
+}
+
+/**
+ * sdma_v5_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
- r = amdgpu_ring_test_helper(ring);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v5_0_gfx_resume_instance(adev, i, false);
if (r)
return r;
}
@@ -1023,33 +1017,22 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
u32 tmp;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
- return r;
- }
-
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
}
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1062,10 +1045,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@@ -1077,8 +1057,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1101,38 +1080,24 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
u32 tmp = 0;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ib_get(adev, NULL, 256,
- AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err0;
- }
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
@@ -1160,10 +1125,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err1;
}
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
r = 0;
@@ -1171,11 +1133,10 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1366,9 +1327,39 @@ static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
-static int sdma_v5_0_early_init(void *handle)
+static int sdma_v5_0_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id)
+{
+ u32 grbm_soft_reset;
+ u32 tmp;
+
+ grbm_soft_reset = REG_SET_FIELD(0,
+ GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+ 1);
+ grbm_soft_reset <<= instance_id;
+
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ return 0;
+}
+
+static const struct amdgpu_sdma_funcs sdma_v5_0_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v5_0_stop_queue,
+ .start_kernel_queue = &sdma_v5_0_restore_queue,
+ .soft_reset_kernel_queue = &sdma_v5_0_soft_reset_engine,
+};
+
+static int sdma_v5_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = sdma_v5_0_init_microcode(adev);
@@ -1385,11 +1376,11 @@ static int sdma_v5_0_early_init(void *handle)
}
-static int sdma_v5_0_sw_init(void *handle)
+static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
uint32_t *ptr;
@@ -1408,6 +1399,8 @@ static int sdma_v5_0_sw_init(void *handle)
return r;
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs;
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -1429,6 +1422,19 @@ static int sdma_v5_0_sw_init(void *handle)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 0, 0):
+ case IP_VERSION(5, 0, 2):
+ case IP_VERSION(5, 0, 5):
+ if (adev->sdma.instance[0].fw_version >= 35)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
/* Allocate memory for SDMA IP Dump buffer */
ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
if (ptr)
@@ -1436,17 +1442,22 @@ static int sdma_v5_0_sw_init(void *handle)
else
DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v5_0_sw_fini(void *handle)
+static int sdma_v5_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
amdgpu_sdma_destroy_inst_ctx(adev, false);
kfree(adev->sdma.ip_dump);
@@ -1454,10 +1465,10 @@ static int sdma_v5_0_sw_fini(void *handle)
return 0;
}
-static int sdma_v5_0_hw_init(void *handle)
+static int sdma_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
sdma_v5_0_init_golden_registers(adev);
@@ -1466,9 +1477,9 @@ static int sdma_v5_0_hw_init(void *handle)
return r;
}
-static int sdma_v5_0_hw_fini(void *handle)
+static int sdma_v5_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1479,23 +1490,19 @@ static int sdma_v5_0_hw_fini(void *handle)
return 0;
}
-static int sdma_v5_0_suspend(void *handle)
+static int sdma_v5_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_0_hw_fini(adev);
+ return sdma_v5_0_hw_fini(ip_block);
}
-static int sdma_v5_0_resume(void *handle)
+static int sdma_v5_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_0_hw_init(adev);
+ return sdma_v5_0_hw_init(ip_block);
}
-static bool sdma_v5_0_is_idle(void *handle)
+static bool sdma_v5_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1508,11 +1515,11 @@ static bool sdma_v5_0_is_idle(void *handle)
return true;
}
-static int sdma_v5_0_wait_for_idle(void *handle)
+static int sdma_v5_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
@@ -1525,13 +1532,88 @@ static int sdma_v5_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int sdma_v5_0_soft_reset(void *handle)
+static int sdma_v5_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
return 0;
}
+static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_id = ring->me;
+
+ return amdgpu_sdma_reset_engine(adev, inst_id);
+}
+
+static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring)
+{
+ u32 f32_cntl, freeze, cntl, stat1_reg;
+ struct amdgpu_device *adev = ring->adev;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ i = ring->me;
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* stop queue */
+ sdma_v5_0_gfx_stop(adev, 1 << i);
+
+ /* engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1)
+ break;
+ udelay(1);
+ }
+
+ /* check sdma copy engine all idle if frozen not received*/
+ if (j == adev->usec_timeout) {
+ stat1_reg = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_STATUS1_REG));
+ if ((stat1_reg & 0x3FF) != 0x3FF) {
+ DRM_ERROR("cannot soft reset as sdma not idle\n");
+ r = -ETIMEDOUT;
+ goto err0;
+ }
+ }
+
+ f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+
+ cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl);
+err0:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_id = ring->me;
+ u32 freeze;
+ int r;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ /* unfreeze*/
+ freeze = RREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze);
+
+ r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true);
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
@@ -1729,10 +1811,10 @@ static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
}
-static int sdma_v5_0_set_clockgating_state(void *handle,
+static int sdma_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1753,15 +1835,15 @@ static int sdma_v5_0_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v5_0_set_powergating_state(void *handle,
+static int sdma_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1778,9 +1860,9 @@ static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags)
*flags |= AMD_CG_SUPPORT_SDMA_LS;
}
-static void sdma_v5_0_print_ip_state(void *handle, struct drm_printer *p)
+static void sdma_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
uint32_t instance_offset;
@@ -1799,9 +1881,9 @@ static void sdma_v5_0_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void sdma_v5_0_dump_ip_state(void *handle)
+static void sdma_v5_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t instance_offset;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_0);
@@ -1823,7 +1905,6 @@ static void sdma_v5_0_dump_ip_state(void *handle)
static const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
.name = "sdma_v5_0",
.early_init = sdma_v5_0_early_init,
- .late_init = NULL,
.sw_init = sdma_v5_0_sw_init,
.sw_fini = sdma_v5_0_sw_fini,
.hw_init = sdma_v5_0_hw_init,
@@ -1874,6 +1955,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
.preempt_ib = sdma_v5_0_ring_preempt_ib,
+ .reset = sdma_v5_0_reset_queue,
};
static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index bc9b240a3488..23f97da62808 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -113,6 +113,8 @@ static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring);
+static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring);
static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
{
@@ -394,11 +396,9 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
}
if ((flags & AMDGPU_FENCE_FLAG_INT)) {
- uint32_t ctx = ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
- amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
}
}
@@ -407,15 +407,15 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
* sdma_v5_2_gfx_stop - stop the gfx async dma engines
*
* @adev: amdgpu_device pointer
- *
+ * @inst_mask: mask of dma engine instances to be disabled
* Stop the gfx async dma ring buffers.
*/
-static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask)
{
u32 rb_cntl, ib_cntl;
int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
+ for_each_inst(i, inst_mask) {
rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
@@ -506,9 +506,11 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
int i;
+ uint32_t inst_mask;
+ inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
if (!enable) {
- sdma_v5_2_gfx_stop(adev);
+ sdma_v5_2_gfx_stop(adev, inst_mask);
sdma_v5_2_rlc_stop(adev);
}
@@ -522,14 +524,17 @@ static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
}
/**
- * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ * sdma_v5_2_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them.
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+
+static int sdma_v5_2_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
@@ -539,139 +544,161 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
u32 temp;
u32 wptr_poll_cntl;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
+ ring = &adev->sdma.instance[i].ring;
- if (!amdgpu_sriov_vf(adev))
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+ }
- /* setup the wptr shadow polling */
- wptr_gpu_addr = ring->wptr_gpu_addr;
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
- wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
- mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
- SDMA0_GFX_RB_WPTR_POLL_CNTL,
- F32_POLL_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
- wptr_poll_cntl);
-
- /* set the wb address whether it's enabled or not */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
- upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
- lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
-
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
-
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ }
- doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
- doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+ doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
- }
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range);
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range);
- if (amdgpu_sriov_vf(adev))
- sdma_v5_2_ring_set_wptr(ring);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_2_ring_set_wptr(ring);
- /* set minor_ptr_update to 0 after wptr programed */
+ /* set minor_ptr_update to 0 after wptr programed */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
- /* SRIOV VF has no control of any of registers below */
- if (!amdgpu_sriov_vf(adev)) {
- /* set utc l1 enable flag always to 1 */
- temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-
- /* enable MCBP */
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
- /* Set up RESP_MODE to non-copy addresses */
- temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
-
- /* program default cache read and write policy */
- temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
- /* clean read policy and write policy bits */
- temp &= 0xFF0FFF;
- temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
- (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
- SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
-
- /* unhalt engine */
- temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
- }
+ /* SRIOV VF has no control of any of registers below */
+ if (!amdgpu_sriov_vf(adev)) {
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+ /* enable MCBP */
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
+ SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v5_2_ctx_switch_enable(adev, true);
- sdma_v5_2_enable(adev, true);
- }
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_2_ctx_switch_enable(adev, true);
+ sdma_v5_2_enable(adev, true);
+ }
+
+ return amdgpu_ring_test_helper(ring);
+}
- r = amdgpu_ring_test_helper(ring);
+/**
+ * sdma_v5_2_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v5_2_gfx_resume_instance(adev, i, false);
if (r)
return r;
}
@@ -736,37 +763,49 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
return 0;
}
-static int sdma_v5_2_soft_reset(void *handle)
+static int sdma_v5_2_soft_reset_engine(struct amdgpu_device *adev, u32 instance_id)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 grbm_soft_reset;
u32 tmp;
- int i;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- grbm_soft_reset = REG_SET_FIELD(0,
- GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
- 1);
- grbm_soft_reset <<= i;
+ grbm_soft_reset = REG_SET_FIELD(0,
+ GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+ 1);
+ grbm_soft_reset <<= instance_id;
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
- udelay(50);
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ return 0;
+}
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+static int sdma_v5_2_soft_reset(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sdma_v5_2_soft_reset_engine(adev, i);
udelay(50);
}
return 0;
}
+static const struct amdgpu_sdma_funcs sdma_v5_2_sdma_funcs = {
+ .stop_kernel_queue = &sdma_v5_2_stop_queue,
+ .start_kernel_queue = &sdma_v5_2_restore_queue,
+ .soft_reset_kernel_queue = &sdma_v5_2_soft_reset_engine,
+};
+
/**
* sdma_v5_2_start - setup and start the async dma engines
*
@@ -778,6 +817,7 @@ static int sdma_v5_2_soft_reset(void *handle)
static int sdma_v5_2_start(struct amdgpu_device *adev)
{
int r = 0;
+ struct amdgpu_ip_block *ip_block;
if (amdgpu_sriov_vf(adev)) {
sdma_v5_2_ctx_switch_enable(adev, false);
@@ -798,7 +838,11 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
msleep(1000);
}
- sdma_v5_2_soft_reset(adev);
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA);
+ if (!ip_block)
+ return -EINVAL;
+
+ sdma_v5_2_soft_reset(ip_block);
/* unhalt the MEs */
sdma_v5_2_enable(adev, true);
/* enable sdma ring preemption */
@@ -873,33 +917,22 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
int r;
u32 tmp;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
- return r;
- }
-
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
}
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -912,10 +945,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@@ -927,8 +957,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -951,37 +980,23 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
u32 tmp = 0;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err0;
- }
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
}
ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
@@ -1009,10 +1024,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err1;
}
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
r = 0;
@@ -1020,11 +1032,10 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1180,7 +1191,28 @@ static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
static void sdma_v5_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
+ uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
+
+ /* Update the PD address for this VMID. */
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
+ (hub->ctx_addr_distance * vmid),
+ lower_32_bits(pd_addr));
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
+ (hub->ctx_addr_distance * vmid),
+ upper_32_bits(pd_addr));
+
+ /* Trigger invalidation. */
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATION) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_GFX_ENG_ID(ring->vm_inv_eng) |
+ SDMA_PKT_VM_INVALIDATION_HEADER_MM_ENG_ID(0x1f));
+ amdgpu_ring_write(ring, req);
+ amdgpu_ring_write(ring, 0xFFFFFFFF);
+ amdgpu_ring_write(ring,
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(1 << vmid) |
+ SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(0x1F));
}
static void sdma_v5_2_ring_emit_wreg(struct amdgpu_ring *ring,
@@ -1216,9 +1248,9 @@ static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
-static int sdma_v5_2_early_init(void *handle)
+static int sdma_v5_2_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_sdma_init_microcode(adev, 0, true);
@@ -1268,11 +1300,11 @@ static unsigned sdma_v5_2_seq_to_trap_id(int seq_num)
return -EINVAL;
}
-static int sdma_v5_2_sw_init(void *handle)
+static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
uint32_t *ptr;
@@ -1286,6 +1318,8 @@ static int sdma_v5_2_sw_init(void *handle)
}
for (i = 0; i < adev->sdma.num_instances; i++) {
+ mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
+ adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs;
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -1306,6 +1340,24 @@ static int sdma_v5_2_sw_init(void *handle)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(5, 2, 0):
+ case IP_VERSION(5, 2, 2):
+ case IP_VERSION(5, 2, 3):
+ case IP_VERSION(5, 2, 4):
+ if (adev->sdma.instance[0].fw_version >= 76)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ case IP_VERSION(5, 2, 5):
+ if (adev->sdma.instance[0].fw_version >= 34)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
/* Allocate memory for SDMA IP Dump buffer */
ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
if (ptr)
@@ -1313,17 +1365,22 @@ static int sdma_v5_2_sw_init(void *handle)
else
DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v5_2_sw_fini(void *handle)
+static int sdma_v5_2_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
amdgpu_sdma_destroy_inst_ctx(adev, true);
kfree(adev->sdma.ip_dump);
@@ -1331,16 +1388,16 @@ static int sdma_v5_2_sw_fini(void *handle)
return 0;
}
-static int sdma_v5_2_hw_init(void *handle)
+static int sdma_v5_2_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return sdma_v5_2_start(adev);
}
-static int sdma_v5_2_hw_fini(void *handle)
+static int sdma_v5_2_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1351,23 +1408,19 @@ static int sdma_v5_2_hw_fini(void *handle)
return 0;
}
-static int sdma_v5_2_suspend(void *handle)
+static int sdma_v5_2_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_2_hw_fini(adev);
+ return sdma_v5_2_hw_fini(ip_block);
}
-static int sdma_v5_2_resume(void *handle)
+static int sdma_v5_2_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v5_2_hw_init(adev);
+ return sdma_v5_2_hw_init(ip_block);
}
-static bool sdma_v5_2_is_idle(void *handle)
+static bool sdma_v5_2_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1380,11 +1433,11 @@ static bool sdma_v5_2_is_idle(void *handle)
return true;
}
-static int sdma_v5_2_wait_for_idle(void *handle)
+static int sdma_v5_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1, sdma2, sdma3;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
@@ -1399,6 +1452,84 @@ static int sdma_v5_2_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
+static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_id = ring->me;
+
+ return amdgpu_sdma_reset_engine(adev, inst_id);
+}
+
+static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring)
+{
+ u32 f32_cntl, freeze, cntl, stat1_reg;
+ struct amdgpu_device *adev = ring->adev;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ i = ring->me;
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+
+ /* stop queue */
+ sdma_v5_2_gfx_stop(adev, 1 << i);
+
+ /*engine stop SDMA1_F32_CNTL.HALT to 1 and SDMAx_FREEZE freeze bit to 1 */
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE), freeze);
+
+ for (j = 0; j < adev->usec_timeout; j++) {
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_FREEZE));
+
+ if (REG_GET_FIELD(freeze, SDMA0_FREEZE, FROZEN) & 1)
+ break;
+ udelay(1);
+ }
+
+
+ if (j == adev->usec_timeout) {
+ stat1_reg = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_STATUS1_REG));
+ if ((stat1_reg & 0x3FF) != 0x3FF) {
+ DRM_ERROR("cannot soft reset as sdma not idle\n");
+ r = -ETIMEDOUT;
+ goto err0;
+ }
+ }
+
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+
+ cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ cntl = REG_SET_FIELD(cntl, SDMA0_CNTL, UTC_L1_ENABLE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), cntl);
+
+err0:
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
+static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 inst_id = ring->me;
+ u32 freeze;
+ int r;
+
+ amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
+ /* unfreeze and unhalt */
+ freeze = RREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE));
+ freeze = REG_SET_FIELD(freeze, SDMA0_FREEZE, FREEZE, 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, inst_id, mmSDMA0_FREEZE), freeze);
+
+ r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
+ return r;
+}
+
static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
{
int i, r = 0;
@@ -1652,10 +1783,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
}
-static int sdma_v5_2_set_clockgating_state(void *handle,
+static int sdma_v5_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1681,15 +1812,15 @@ static int sdma_v5_2_set_clockgating_state(void *handle,
return 0;
}
-static int sdma_v5_2_set_powergating_state(void *handle,
+static int sdma_v5_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v5_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1736,9 +1867,9 @@ static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring)
amdgpu_gfx_off_ctrl(adev, true);
}
-static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p)
+static void sdma_v5_2_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
uint32_t instance_offset;
@@ -1757,9 +1888,9 @@ static void sdma_v5_2_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void sdma_v5_2_dump_ip_state(void *handle)
+static void sdma_v5_2_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t instance_offset;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_5_2);
@@ -1781,7 +1912,6 @@ static void sdma_v5_2_dump_ip_state(void *handle)
static const struct amd_ip_funcs sdma_v5_2_ip_funcs = {
.name = "sdma_v5_2",
.early_init = sdma_v5_2_early_init,
- .late_init = NULL,
.sw_init = sdma_v5_2_sw_init,
.sw_fini = sdma_v5_2_sw_fini,
.hw_init = sdma_v5_2_hw_init,
@@ -1834,6 +1964,7 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_2_ring_init_cond_exec,
.preempt_ib = sdma_v5_2_ring_preempt_ib,
+ .reset = sdma_v5_2_reset_queue,
};
static void sdma_v5_2_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 208a1fa9d4e7..a9bdf8d61d6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -43,6 +43,8 @@
#include "sdma_common.h"
#include "sdma_v6_0.h"
#include "v11_structs.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
@@ -51,6 +53,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin");
MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin");
MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin");
MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin");
+MODULE_FIRMWARE("amdgpu/sdma_6_1_3.bin");
#define SDMA1_REG_OFFSET 0x600
#define SDMA0_HYP_DEC_REG_START 0x5880
@@ -375,11 +378,9 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
}
if (flags & AMDGPU_FENCE_FLAG_INT) {
- uint32_t ctx = ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
- amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
}
}
@@ -469,14 +470,16 @@ static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable)
}
/**
- * sdma_v6_0_gfx_resume - setup and start the async dma engines
+ * sdma_v6_0_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them.
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
+static int sdma_v6_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
@@ -485,132 +488,152 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
u32 doorbell_offset;
u32 temp;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
-
- if (!amdgpu_sriov_vf(adev))
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ ring = &adev->sdma.instance[i].ring;
+ if (!amdgpu_sriov_vf(adev))
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1);
+
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ if (!restore)
+ ring->wptr = 0;
- /* setup the wptr shadow polling */
- wptr_gpu_addr = ring->wptr_gpu_addr;
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
-
- /* set the wb address whether it's enabled or not */
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
- upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
- lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, F32_WPTR_POLL_ENABLE, 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
- ring->wptr = 0;
+ doorbell = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
- }
+ if (i == 0)
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
- doorbell = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
- doorbell_offset = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
+ if (amdgpu_sriov_vf(adev))
+ sdma_v6_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ /* Set up sdma hang watchdog */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
+ /* 100ms per unit */
+ temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
+ max(adev->usec_timeout/100000, 1));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
+ SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
- }
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
-
- if (i == 0)
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
-
- if (amdgpu_sriov_vf(adev))
- sdma_v6_0_ring_set_wptr(ring);
-
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
-
- /* Set up sdma hang watchdog */
- temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
- /* 100ms per unit */
- temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
- max(adev->usec_timeout/100000, 1));
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp);
-
- /* Set up RESP_MODE to non-copy addresses */
- temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
- temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
-
- /* program default cache read and write policy */
- temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
- /* clean read policy and write policy bits */
- temp &= 0xFF0FFF;
- temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
- (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
- SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
-
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, TH1_RESET, 0);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), temp);
- }
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, TH1_RESET, 0);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), temp);
+ }
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
- ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v6_0_enable(adev, true);
+
+ return amdgpu_ring_test_helper(ring);
+}
- if (amdgpu_sriov_vf(adev))
- sdma_v6_0_enable(adev, true);
+/**
+ * sdma_v6_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
- r = amdgpu_ring_test_helper(ring);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v6_0_gfx_resume_instance(adev, i, false);
if (r)
return r;
}
@@ -733,9 +756,9 @@ static int sdma_v6_0_load_microcode(struct amdgpu_device *adev)
return 0;
}
-static int sdma_v6_0_soft_reset(void *handle)
+static int sdma_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp;
int i;
@@ -769,9 +792,9 @@ static int sdma_v6_0_soft_reset(void *handle)
return sdma_v6_0_start(adev);
}
-static bool sdma_v6_0_check_soft_reset(void *handle)
+static bool sdma_v6_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r;
long tmo = msecs_to_jiffies(1000);
@@ -868,6 +891,12 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd,
m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
+ m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+ m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
+ m->sdmax_rlcx_f32_dbg0 = lower_32_bits(prop->fence_address);
+ m->sdmax_rlcx_f32_dbg1 = upper_32_bits(prop->fence_address);
+
return 0;
}
@@ -894,33 +923,22 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
u32 tmp;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
- return r;
- }
-
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
}
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -933,10 +951,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@@ -948,8 +963,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -972,37 +986,23 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
u32 tmp = 0;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err0;
- }
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
}
ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
@@ -1030,10 +1030,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err1;
}
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
r = 0;
@@ -1041,11 +1038,10 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1272,11 +1268,28 @@ static void sdma_v6_0_set_ras_funcs(struct amdgpu_device *adev)
}
}
-static int sdma_v6_0_early_init(void *handle)
+static int sdma_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = true;
+ break;
+ case 1:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = false;
+ break;
+ case 2:
+ adev->sdma.no_user_submission = true;
+ adev->sdma.disable_uq = false;
+ break;
+ }
+
r = amdgpu_sdma_init_microcode(adev, 0, true);
if (r)
return r;
@@ -1291,11 +1304,11 @@ static int sdma_v6_0_early_init(void *handle)
return 0;
}
-static int sdma_v6_0_sw_init(void *handle)
+static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
uint32_t *ptr;
@@ -1306,11 +1319,19 @@ static int sdma_v6_0_sw_init(void *handle)
if (r)
return r;
+ /* SDMA user fence event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_11_0_0__SRCID__SDMA_FENCE,
+ &adev->sdma.fence_irq);
+ if (r)
+ return r;
+
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->me = i;
+ ring->no_user_submission = adev->sdma.no_user_submission;
DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
ring->use_doorbell?"true":"false");
@@ -1328,6 +1349,19 @@ static int sdma_v6_0_sw_init(void *handle)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 2):
+ case IP_VERSION(6, 0, 3):
+ if (adev->sdma.instance[0].fw_version >= 21)
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+ break;
+ default:
+ break;
+ }
+
if (amdgpu_sdma_ras_sw_init(adev)) {
dev_err(adev->dev, "Failed to initialize sdma ras block!\n");
return -EINVAL;
@@ -1340,17 +1374,39 @@ static int sdma_v6_0_sw_init(void *handle)
else
DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(6, 0, 0):
+ if ((adev->sdma.instance[0].fw_version >= 24) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 2):
+ if ((adev->sdma.instance[0].fw_version >= 21) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ case IP_VERSION(6, 0, 3):
+ if ((adev->sdma.instance[0].fw_version >= 25) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ default:
+ break;
+ }
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return r;
}
-static int sdma_v6_0_sw_fini(void *handle)
+static int sdma_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
amdgpu_sdma_destroy_inst_ctx(adev, true);
kfree(adev->sdma.ip_dump);
@@ -1358,43 +1414,68 @@ static int sdma_v6_0_sw_fini(void *handle)
return 0;
}
-static int sdma_v6_0_hw_init(void *handle)
+static int sdma_v6_0_set_userq_trap_interrupts(struct amdgpu_device *adev,
+ bool enable)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int irq_type;
+ int i, r;
- return sdma_v6_0_start(adev);
+ if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->sdma.trap_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->sdma.trap_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v6_0_start(adev);
+ if (r)
+ return r;
+
+ return sdma_v6_0_set_userq_trap_interrupts(adev, true);
}
-static int sdma_v6_0_hw_fini(void *handle)
+static int sdma_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
sdma_v6_0_ctxempty_int_enable(adev, false);
sdma_v6_0_enable(adev, false);
+ sdma_v6_0_set_userq_trap_interrupts(adev, false);
return 0;
}
-static int sdma_v6_0_suspend(void *handle)
+static int sdma_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v6_0_hw_fini(adev);
+ return sdma_v6_0_hw_fini(ip_block);
}
-static int sdma_v6_0_resume(void *handle)
+static int sdma_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v6_0_hw_init(adev);
+ return sdma_v6_0_hw_init(ip_block);
}
-static bool sdma_v6_0_is_idle(void *handle)
+static bool sdma_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1407,11 +1488,11 @@ static bool sdma_v6_0_is_idle(void *handle)
return true;
}
-static int sdma_v6_0_wait_for_idle(void *handle)
+static int sdma_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v6_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG));
@@ -1469,6 +1550,31 @@ static int sdma_v6_0_ring_preempt_ib(struct amdgpu_ring *ring)
return r;
}
+static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i, r;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (ring == &adev->sdma.instance[i].ring)
+ break;
+ }
+
+ if (i == adev->sdma.num_instances) {
+ DRM_ERROR("sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
+ if (r)
+ return r;
+
+ return sdma_v6_0_gfx_resume_instance(adev, i, true);
+}
+
static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -1493,25 +1599,9 @@ static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
int instances, queue;
- uint32_t mes_queue_id = entry->src_data[0];
DRM_DEBUG("IH: SDMA trap\n");
- if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
- struct amdgpu_mes_queue *queue;
-
- mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
-
- spin_lock(&adev->mes.queue_id_lock);
- queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
- if (queue) {
- DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
- amdgpu_fence_process(queue->ring);
- }
- spin_unlock(&adev->mes.queue_id_lock);
- return 0;
- }
-
queue = entry->ring_id & 0xf;
instances = (entry->ring_id & 0xf0) >> 4;
if (instances > 1) {
@@ -1533,6 +1623,29 @@ static int sdma_v6_0_process_trap_irq(struct amdgpu_device *adev,
return 0;
}
+static int sdma_v6_0_process_fence_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ }
+
+ return 0;
+}
+
static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -1540,25 +1653,25 @@ static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int sdma_v6_0_set_clockgating_state(void *handle,
+static int sdma_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int sdma_v6_0_set_powergating_state(void *handle,
+static int sdma_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
}
-static void sdma_v6_0_print_ip_state(void *handle, struct drm_printer *p)
+static void sdma_v6_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
uint32_t instance_offset;
@@ -1577,9 +1690,9 @@ static void sdma_v6_0_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void sdma_v6_0_dump_ip_state(void *handle)
+static void sdma_v6_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t instance_offset;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_6_0);
@@ -1601,7 +1714,6 @@ static void sdma_v6_0_dump_ip_state(void *handle)
const struct amd_ip_funcs sdma_v6_0_ip_funcs = {
.name = "sdma_v6_0",
.early_init = sdma_v6_0_early_init,
- .late_init = NULL,
.sw_init = sdma_v6_0_sw_init,
.sw_fini = sdma_v6_0_sw_fini,
.hw_init = sdma_v6_0_hw_init,
@@ -1652,6 +1764,7 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
.emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v6_0_ring_init_cond_exec,
.preempt_ib = sdma_v6_0_ring_preempt_ib,
+ .reset = sdma_v6_0_reset_queue,
};
static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1669,6 +1782,10 @@ static const struct amdgpu_irq_src_funcs sdma_v6_0_trap_irq_funcs = {
.process = sdma_v6_0_process_trap_irq,
};
+static const struct amdgpu_irq_src_funcs sdma_v6_0_fence_irq_funcs = {
+ .process = sdma_v6_0_process_fence_irq,
+};
+
static const struct amdgpu_irq_src_funcs sdma_v6_0_illegal_inst_irq_funcs = {
.process = sdma_v6_0_process_illegal_inst_irq,
};
@@ -1678,6 +1795,7 @@ static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev)
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
adev->sdma.num_instances;
adev->sdma.trap_irq.funcs = &sdma_v6_0_trap_irq_funcs;
+ adev->sdma.fence_irq.funcs = &sdma_v6_0_fence_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &sdma_v6_0_illegal_inst_irq_funcs;
}
@@ -1726,7 +1844,7 @@ static void sdma_v6_0_emit_fill_buffer(struct amdgpu_ib *ib,
uint64_t dst_offset,
uint32_t byte_count)
{
- ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL);
ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
ib->ptr[ib->length_dw++] = src_data;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 9288f37a3cc5..86903eccbd4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -33,7 +33,7 @@
#include "gc/gc_12_0_0_offset.h"
#include "gc/gc_12_0_0_sh_mask.h"
#include "hdp/hdp_6_0_0_offset.h"
-#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_12_0_0.h"
#include "soc15_common.h"
#include "soc15.h"
@@ -42,6 +42,8 @@
#include "sdma_common.h"
#include "sdma_v7_0.h"
#include "v12_structs.h"
+#include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin");
MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin");
@@ -204,66 +206,39 @@ static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring)
static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t *wptr_saved;
- uint32_t *is_queue_unmap;
- uint64_t aggregated_db_index;
- uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
DRM_DEBUG("Setting write pointer\n");
- if (ring->is_mes_queue) {
- wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
- is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
- sizeof(uint32_t));
- aggregated_db_index =
- amdgpu_mes_get_aggregated_doorbell_index(adev,
- ring->hw_prio);
-
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
- *wptr_saved = ring->wptr << 2;
- if (*is_queue_unmap) {
- WDOORBELL64(aggregated_db_index, ring->wptr << 2);
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
- } else {
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
- }
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
- if (ring->use_doorbell) {
- DRM_DEBUG("Using doorbell -- "
- "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
- ring->wptr_offs,
- lower_32_bits(ring->wptr << 2),
- upper_32_bits(ring->wptr << 2));
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
- ring->wptr << 2);
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
- } else {
- DRM_DEBUG("Not using doorbell -- "
- "regSDMA%i_GFX_RB_WPTR == 0x%08x "
- "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
- ring->me,
- lower_32_bits(ring->wptr << 2),
- ring->me,
- upper_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
- ring->me,
- regSDMA0_QUEUE0_RB_WPTR),
- lower_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
- ring->me,
- regSDMA0_QUEUE0_RB_WPTR_HI),
- upper_32_bits(ring->wptr << 2));
- }
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
+ ring->me,
+ regSDMA0_QUEUE0_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev,
+ ring->me,
+ regSDMA0_QUEUE0_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
}
}
@@ -407,11 +382,9 @@ static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
}
if (flags & AMDGPU_FENCE_FLAG_INT) {
- uint32_t ctx = ring->is_mes_queue ?
- (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0;
/* generate an interrupt */
amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
- amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
}
}
@@ -490,162 +463,185 @@ static void sdma_v7_0_enable(struct amdgpu_device *adev, bool enable)
}
/**
- * sdma_v7_0_gfx_resume - setup and start the async dma engines
+ * sdma_v7_0_gfx_resume_instance - start/restart a certain sdma engine
*
* @adev: amdgpu_device pointer
+ * @i: instance
+ * @restore: used to restore wptr when restart
*
- * Set up the gfx DMA ring buffers and enable them.
- * Returns 0 for success, error for failure.
+ * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
+ * Return 0 for success.
*/
-static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev)
+static int sdma_v7_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
{
struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl;
u32 rb_bufsz;
u32 doorbell;
u32 doorbell_offset;
- u32 tmp;
+ u32 temp;
u64 wptr_gpu_addr;
- int i, r;
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
+ int r;
- //if (!amdgpu_sriov_vf(adev))
- // WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ ring = &adev->sdma.instance[i].ring;
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
-
- /* Initialize the ring buffer's read and write pointers */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ if (restore) {
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ } else {
WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0);
WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0);
WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0);
WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0);
+ }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = ring->wptr_gpu_addr;
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
+ upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
+ lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ if (amdgpu_sriov_vf(adev))
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
+ else
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
- /* setup the wptr shadow polling */
- wptr_gpu_addr = ring->wptr_gpu_addr;
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
-
- /* set the wb address whether it's enabled or not */
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI),
- upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO),
- lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
-
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
- if (amdgpu_sriov_vf(adev))
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
- else
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
+ if (!restore)
ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
- doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
- doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
+ doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL));
+ doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
- }
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
-
- if (i == 0)
- adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
-
- if (amdgpu_sriov_vf(adev))
- sdma_v7_0_ring_set_wptr(ring);
-
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
-
- /* Set up sdma hang watchdog */
- tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
- /* 100ms per unit */
- tmp = REG_SET_FIELD(tmp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
- max(adev->usec_timeout/100000, 1));
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), tmp);
-
- /* Set up RESP_MODE to non-copy addresses */
- tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
- tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
- tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), tmp);
-
- /* program default cache read and write policy */
- tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
- /* clean read policy and write policy bits */
- tmp &= 0xFF0FFF;
- tmp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
- (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), tmp);
-
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
- tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, HALT, 0);
- tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, RESET, 0);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp);
- }
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0);
+ }
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+ if (i == 0)
+ adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
- ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v7_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
+
+ /* Set up sdma hang watchdog */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
+ /* 100ms per unit */
+ temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
+ max(adev->usec_timeout/100000, 1));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
+ (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, HALT, 0);
+ temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, RESET, 0);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1);
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1);
#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
#endif
- /* enable DMA IBs */
- WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ /* enable DMA IBs */
+ WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl);
+ ring->sched.ready = true;
- ring->sched.ready = true;
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v7_0_ctx_switch_enable(adev, true);
+ sdma_v7_0_enable(adev, true);
+ }
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v7_0_ctx_switch_enable(adev, true);
- sdma_v7_0_enable(adev, true);
- }
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ ring->sched.ready = false;
- r = amdgpu_ring_test_helper(ring);
- if (r) {
- ring->sched.ready = false;
- return r;
- }
+ return r;
+}
+
+/**
+ * sdma_v7_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them.
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev)
+{
+ int i, r;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = sdma_v7_0_gfx_resume_instance(adev, i, false);
+ if (r)
+ return r;
}
return 0;
+
}
/**
@@ -753,9 +749,9 @@ static int sdma_v7_0_load_microcode(struct amdgpu_device *adev)
return 0;
}
-static int sdma_v7_0_soft_reset(void *handle)
+static int sdma_v7_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp;
int i;
@@ -789,9 +785,9 @@ static int sdma_v7_0_soft_reset(void *handle)
return sdma_v7_0_start(adev);
}
-static bool sdma_v7_0_check_soft_reset(void *handle)
+static bool sdma_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r;
long tmo = msecs_to_jiffies(1000);
@@ -806,6 +802,31 @@ static bool sdma_v7_0_check_soft_reset(void *handle)
return false;
}
+static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i, r;
+
+ if (amdgpu_sriov_vf(adev))
+ return -EINVAL;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (ring == &adev->sdma.instance[i].ring)
+ break;
+ }
+
+ if (i == adev->sdma.num_instances) {
+ DRM_ERROR("sdma instance not found\n");
+ return -EINVAL;
+ }
+
+ r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true);
+ if (r)
+ return r;
+
+ return sdma_v7_0_gfx_resume_instance(adev, i, true);
+}
+
/**
* sdma_v7_0_start - setup and start the async dma engines
*
@@ -887,6 +908,12 @@ static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd,
m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
+ m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+ m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
+ m->sdmax_rlcx_mcu_dbg0 = lower_32_bits(prop->fence_address);
+ m->sdmax_rlcx_mcu_dbg1 = upper_32_bits(prop->fence_address);
+
return 0;
}
@@ -913,33 +940,22 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
u32 tmp;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
- return r;
- }
-
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
}
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -952,10 +968,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@@ -967,8 +980,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring)
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -991,37 +1003,23 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
long r;
u32 tmp = 0;
u64 gpu_addr;
- volatile uint32_t *cpu_ptr = NULL;
tmp = 0xCAFEDEAD;
memset(&ib, 0, sizeof(ib));
- if (ring->is_mes_queue) {
- uint32_t offset = 0;
- offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
- ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
-
- offset = amdgpu_mes_ctx_get_offs(ring,
- AMDGPU_MES_CTX_PADDING_OFFS);
- gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
- cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
- *cpu_ptr = tmp;
- } else {
- r = amdgpu_device_wb_get(adev, &index);
- if (r) {
- dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
- return r;
- }
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
- gpu_addr = adev->wb.gpu_addr + (index * 4);
- adev->wb.wb[index] = cpu_to_le32(tmp);
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err0;
- }
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
}
ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
@@ -1049,10 +1047,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto err1;
}
- if (ring->is_mes_queue)
- tmp = le32_to_cpu(*cpu_ptr);
- else
- tmp = le32_to_cpu(adev->wb.wb[index]);
+ tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
r = 0;
@@ -1060,11 +1055,10 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
- if (!ring->is_mes_queue)
- amdgpu_device_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1259,11 +1253,28 @@ static void sdma_v7_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
-static int sdma_v7_0_early_init(void *handle)
+static int sdma_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
+ switch (amdgpu_user_queue) {
+ case -1:
+ case 0:
+ default:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = true;
+ break;
+ case 1:
+ adev->sdma.no_user_submission = false;
+ adev->sdma.disable_uq = false;
+ break;
+ case 2:
+ adev->sdma.no_user_submission = true;
+ adev->sdma.disable_uq = false;
+ break;
+ }
+
r = amdgpu_sdma_init_microcode(adev, 0, true);
if (r) {
DRM_ERROR("Failed to init sdma firmware!\n");
@@ -1279,26 +1290,34 @@ static int sdma_v7_0_early_init(void *handle)
return 0;
}
-static int sdma_v7_0_sw_init(void *handle)
+static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
uint32_t *ptr;
/* SDMA trap event */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
- GFX_11_0_0__SRCID__SDMA_TRAP,
+ GFX_12_0_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
+ /* SDMA user fence event */
+ r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
+ GFX_12_0_0__SRCID__SDMA_FENCE,
+ &adev->sdma.fence_irq);
+ if (r)
+ return r;
+
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->me = i;
+ ring->no_user_submission = adev->sdma.no_user_submission;
DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
ring->use_doorbell?"true":"false");
@@ -1316,6 +1335,13 @@ static int sdma_v7_0_sw_init(void *handle)
return r;
}
+ adev->sdma.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
+ adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_sdma_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
/* Allocate memory for SDMA IP Dump buffer */
ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
if (ptr)
@@ -1323,17 +1349,28 @@ static int sdma_v7_0_sw_init(void *handle)
else
DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
+ switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {
+ case IP_VERSION(7, 0, 0):
+ case IP_VERSION(7, 0, 1):
+ if ((adev->sdma.instance[0].fw_version >= 7836028) && !adev->sdma.disable_uq)
+ adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+ break;
+ default:
+ break;
+ }
+
return r;
}
-static int sdma_v7_0_sw_fini(void *handle)
+static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ amdgpu_sdma_sysfs_reset_mask_fini(adev);
amdgpu_sdma_destroy_inst_ctx(adev, true);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
@@ -1344,43 +1381,68 @@ static int sdma_v7_0_sw_fini(void *handle)
return 0;
}
-static int sdma_v7_0_hw_init(void *handle)
+static int sdma_v7_0_set_userq_trap_interrupts(struct amdgpu_device *adev,
+ bool enable)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned int irq_type;
+ int i, r;
- return sdma_v7_0_start(adev);
+ if (adev->userq_funcs[AMDGPU_HW_IP_DMA]) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ irq_type = AMDGPU_SDMA_IRQ_INSTANCE0 + i;
+ if (enable)
+ r = amdgpu_irq_get(adev, &adev->sdma.trap_irq,
+ irq_type);
+ else
+ r = amdgpu_irq_put(adev, &adev->sdma.trap_irq,
+ irq_type);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int sdma_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r;
+
+ r = sdma_v7_0_start(adev);
+ if (r)
+ return r;
+
+ return sdma_v7_0_set_userq_trap_interrupts(adev, true);
}
-static int sdma_v7_0_hw_fini(void *handle)
+static int sdma_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
sdma_v7_0_ctx_switch_enable(adev, false);
sdma_v7_0_enable(adev, false);
+ sdma_v7_0_set_userq_trap_interrupts(adev, false);
return 0;
}
-static int sdma_v7_0_suspend(void *handle)
+static int sdma_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v7_0_hw_fini(adev);
+ return sdma_v7_0_hw_fini(ip_block);
}
-static int sdma_v7_0_resume(void *handle)
+static int sdma_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return sdma_v7_0_hw_init(adev);
+ return sdma_v7_0_hw_init(ip_block);
}
-static bool sdma_v7_0_is_idle(void *handle)
+static bool sdma_v7_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 i;
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -1393,11 +1455,11 @@ static bool sdma_v7_0_is_idle(void *handle)
return true;
}
-static int sdma_v7_0_wait_for_idle(void *handle)
+static int sdma_v7_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 sdma0, sdma1;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
sdma0 = RREG32(sdma_v7_0_get_reg_offset(adev, 0, regSDMA0_STATUS_REG));
@@ -1481,25 +1543,9 @@ static int sdma_v7_0_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
{
int instances, queue;
- uint32_t mes_queue_id = entry->src_data[0];
DRM_DEBUG("IH: SDMA trap\n");
- if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
- struct amdgpu_mes_queue *queue;
-
- mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
-
- spin_lock(&adev->mes.queue_id_lock);
- queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
- if (queue) {
- DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
- amdgpu_fence_process(queue->ring);
- }
- spin_unlock(&adev->mes.queue_id_lock);
- return 0;
- }
-
queue = entry->ring_id & 0xf;
instances = (entry->ring_id & 0xf0) >> 4;
if (instances > 1) {
@@ -1521,6 +1567,29 @@ static int sdma_v7_0_process_trap_irq(struct amdgpu_device *adev,
return 0;
}
+static int sdma_v7_0_process_fence_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u32 doorbell_offset = entry->src_data[0];
+
+ if (adev->enable_mes && doorbell_offset) {
+ struct amdgpu_userq_fence_driver *fence_drv = NULL;
+ struct xarray *xa = &adev->userq_xa;
+ unsigned long flags;
+
+ doorbell_offset >>= SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ xa_lock_irqsave(xa, flags);
+ fence_drv = xa_load(xa, doorbell_offset);
+ if (fence_drv)
+ amdgpu_userq_fence_driver_process(fence_drv);
+ xa_unlock_irqrestore(xa, flags);
+ }
+
+ return 0;
+}
+
static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -1528,25 +1597,25 @@ static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int sdma_v7_0_set_clockgating_state(void *handle,
+static int sdma_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int sdma_v7_0_set_powergating_state(void *handle,
+static int sdma_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void sdma_v7_0_get_clockgating_state(void *handle, u64 *flags)
+static void sdma_v7_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
}
-static void sdma_v7_0_print_ip_state(void *handle, struct drm_printer *p)
+static void sdma_v7_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
uint32_t instance_offset;
@@ -1565,9 +1634,9 @@ static void sdma_v7_0_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void sdma_v7_0_dump_ip_state(void *handle)
+static void sdma_v7_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t instance_offset;
uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_0);
@@ -1640,6 +1709,7 @@ static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = {
.emit_reg_write_reg_wait = sdma_v7_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v7_0_ring_init_cond_exec,
.preempt_ib = sdma_v7_0_ring_preempt_ib,
+ .reset = sdma_v7_0_reset_queue,
};
static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1657,6 +1727,10 @@ static const struct amdgpu_irq_src_funcs sdma_v7_0_trap_irq_funcs = {
.process = sdma_v7_0_process_trap_irq,
};
+static const struct amdgpu_irq_src_funcs sdma_v7_0_fence_irq_funcs = {
+ .process = sdma_v7_0_process_fence_irq,
+};
+
static const struct amdgpu_irq_src_funcs sdma_v7_0_illegal_inst_irq_funcs = {
.process = sdma_v7_0_process_illegal_inst_irq,
};
@@ -1666,6 +1740,7 @@ static void sdma_v7_0_set_irq_funcs(struct amdgpu_device *adev)
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
adev->sdma.num_instances;
adev->sdma.trap_irq.funcs = &sdma_v7_0_trap_irq_funcs;
+ adev->sdma.fence_irq.funcs = &sdma_v7_0_fence_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &sdma_v7_0_illegal_inst_irq_funcs;
}
@@ -1688,11 +1763,12 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint32_t byte_count,
uint32_t copy_flags)
{
- uint32_t num_type, data_format, max_com;
+ uint32_t num_type, data_format, max_com, write_cm;
max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED);
data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT);
num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE);
+ write_cm = AMDGPU_COPY_FLAGS_GET(copy_flags, WRITE_COMPRESS_DISABLE) ? 2 : 1;
ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
@@ -1709,7 +1785,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib,
if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)))
ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) |
((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) |
- ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) |
+ ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(write_cm) : 0) |
SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1);
else
ib->ptr[ib->length_dw++] = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 85235470e872..e0f139de7991 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -35,6 +35,7 @@
#include "amdgpu_vce.h"
#include "atom.h"
#include "amd_pcie.h"
+
#include "si_dpm.h"
#include "sid.h"
#include "si_ih.h"
@@ -44,17 +45,30 @@
#include "dce_v6_0.h"
#include "si.h"
#include "uvd_v3_1.h"
-#include "amdgpu_vkms.h"
+
+#include "uvd/uvd_4_0_d.h"
+
+#include "smu/smu_6_0_d.h"
+#include "smu/smu_6_0_sh_mask.h"
+
#include "gca/gfx_6_0_d.h"
+#include "gca/gfx_6_0_sh_mask.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
+
#include "gmc/gmc_6_0_d.h"
+#include"gmc/gmc_6_0_sh_mask.h"
+
#include "dce/dce_6_0_d.h"
-#include "uvd/uvd_4_0_d.h"
+#include "dce/dce_6_0_sh_mask.h"
+
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
+#include "si_enums.h"
#include "amdgpu_dm.h"
+#include "amdgpu_vkms.h"
static const u32 tahiti_golden_registers[] =
{
@@ -909,7 +923,7 @@ static const u32 hainan_mgcg_cgcg_init[] =
/* XXX: update when we support VCE */
#if 0
-/* tahiti, pitcarin, verde */
+/* tahiti, pitcairn, verde */
static const struct amdgpu_video_codec_info tahiti_video_codecs_encode_array[] =
{
{
@@ -940,7 +954,7 @@ static const struct amdgpu_video_codecs hainan_video_codecs_encode =
.codec_array = NULL,
};
-/* tahiti, pitcarin, verde, oland */
+/* tahiti, pitcairn, verde, oland */
static const struct amdgpu_video_codec_info tahiti_video_codecs_decode_array[] =
{
{
@@ -1071,8 +1085,8 @@ static u32 si_smc_rreg(struct amdgpu_device *adev, u32 reg)
u32 r;
spin_lock_irqsave(&adev->smc_idx_lock, flags);
- WREG32(SMC_IND_INDEX_0, (reg));
- r = RREG32(SMC_IND_DATA_0);
+ WREG32(mmSMC_IND_INDEX_0, (reg));
+ r = RREG32(mmSMC_IND_DATA_0);
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
return r;
}
@@ -1082,8 +1096,8 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
unsigned long flags;
spin_lock_irqsave(&adev->smc_idx_lock, flags);
- WREG32(SMC_IND_INDEX_0, (reg));
- WREG32(SMC_IND_DATA_0, (v));
+ WREG32(mmSMC_IND_INDEX_0, (reg));
+ WREG32(mmSMC_IND_DATA_0, (v));
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
}
@@ -1110,55 +1124,55 @@ static void si_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
}
static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
- {GRBM_STATUS},
+ {mmGRBM_STATUS},
{mmGRBM_STATUS2},
{mmGRBM_STATUS_SE0},
{mmGRBM_STATUS_SE1},
{mmSRBM_STATUS},
{mmSRBM_STATUS2},
- {DMA_STATUS_REG + DMA0_REGISTER_OFFSET},
- {DMA_STATUS_REG + DMA1_REGISTER_OFFSET},
+ {mmDMA_STATUS_REG + DMA0_REGISTER_OFFSET},
+ {mmDMA_STATUS_REG + DMA1_REGISTER_OFFSET},
{mmCP_STAT},
{mmCP_STALLED_STAT1},
{mmCP_STALLED_STAT2},
{mmCP_STALLED_STAT3},
- {GB_ADDR_CONFIG},
- {MC_ARB_RAMCFG},
- {GB_TILE_MODE0},
- {GB_TILE_MODE1},
- {GB_TILE_MODE2},
- {GB_TILE_MODE3},
- {GB_TILE_MODE4},
- {GB_TILE_MODE5},
- {GB_TILE_MODE6},
- {GB_TILE_MODE7},
- {GB_TILE_MODE8},
- {GB_TILE_MODE9},
- {GB_TILE_MODE10},
- {GB_TILE_MODE11},
- {GB_TILE_MODE12},
- {GB_TILE_MODE13},
- {GB_TILE_MODE14},
- {GB_TILE_MODE15},
- {GB_TILE_MODE16},
- {GB_TILE_MODE17},
- {GB_TILE_MODE18},
- {GB_TILE_MODE19},
- {GB_TILE_MODE20},
- {GB_TILE_MODE21},
- {GB_TILE_MODE22},
- {GB_TILE_MODE23},
- {GB_TILE_MODE24},
- {GB_TILE_MODE25},
- {GB_TILE_MODE26},
- {GB_TILE_MODE27},
- {GB_TILE_MODE28},
- {GB_TILE_MODE29},
- {GB_TILE_MODE30},
- {GB_TILE_MODE31},
- {CC_RB_BACKEND_DISABLE, true},
- {GC_USER_RB_BACKEND_DISABLE, true},
- {PA_SC_RASTER_CONFIG, true},
+ {mmGB_ADDR_CONFIG},
+ {mmMC_ARB_RAMCFG},
+ {mmGB_TILE_MODE0},
+ {mmGB_TILE_MODE1},
+ {mmGB_TILE_MODE2},
+ {mmGB_TILE_MODE3},
+ {mmGB_TILE_MODE4},
+ {mmGB_TILE_MODE5},
+ {mmGB_TILE_MODE6},
+ {mmGB_TILE_MODE7},
+ {mmGB_TILE_MODE8},
+ {mmGB_TILE_MODE9},
+ {mmGB_TILE_MODE10},
+ {mmGB_TILE_MODE11},
+ {mmGB_TILE_MODE12},
+ {mmGB_TILE_MODE13},
+ {mmGB_TILE_MODE14},
+ {mmGB_TILE_MODE15},
+ {mmGB_TILE_MODE16},
+ {mmGB_TILE_MODE17},
+ {mmGB_TILE_MODE18},
+ {mmGB_TILE_MODE19},
+ {mmGB_TILE_MODE20},
+ {mmGB_TILE_MODE21},
+ {mmGB_TILE_MODE22},
+ {mmGB_TILE_MODE23},
+ {mmGB_TILE_MODE24},
+ {mmGB_TILE_MODE25},
+ {mmGB_TILE_MODE26},
+ {mmGB_TILE_MODE27},
+ {mmGB_TILE_MODE28},
+ {mmGB_TILE_MODE29},
+ {mmGB_TILE_MODE30},
+ {mmGB_TILE_MODE31},
+ {mmCC_RB_BACKEND_DISABLE, true},
+ {mmGC_USER_RB_BACKEND_DISABLE, true},
+ {mmPA_SC_RASTER_CONFIG, true},
};
static uint32_t si_get_register_value(struct amdgpu_device *adev,
@@ -1264,37 +1278,37 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev)
u32 rom_cntl;
bool r;
- bus_cntl = RREG32(R600_BUS_CNTL);
+ bus_cntl = RREG32(mmBUS_CNTL);
if (adev->mode_info.num_crtc) {
- d1vga_control = RREG32(AVIVO_D1VGA_CONTROL);
- d2vga_control = RREG32(AVIVO_D2VGA_CONTROL);
- vga_render_control = RREG32(VGA_RENDER_CONTROL);
+ d1vga_control = RREG32(mmD1VGA_CONTROL);
+ d2vga_control = RREG32(mmD2VGA_CONTROL);
+ vga_render_control = RREG32(mmVGA_RENDER_CONTROL);
}
rom_cntl = RREG32(R600_ROM_CNTL);
/* enable the rom */
- WREG32(R600_BUS_CNTL, (bus_cntl & ~R600_BIOS_ROM_DIS));
+ WREG32(mmBUS_CNTL, (bus_cntl & ~BUS_CNTL__BIOS_ROM_DIS_MASK));
if (adev->mode_info.num_crtc) {
/* Disable VGA mode */
- WREG32(AVIVO_D1VGA_CONTROL,
- (d1vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
- AVIVO_DVGA_CONTROL_TIMING_SELECT)));
- WREG32(AVIVO_D2VGA_CONTROL,
- (d2vga_control & ~(AVIVO_DVGA_CONTROL_MODE_ENABLE |
- AVIVO_DVGA_CONTROL_TIMING_SELECT)));
- WREG32(VGA_RENDER_CONTROL,
- (vga_render_control & C_000300_VGA_VSTATUS_CNTL));
+ WREG32(mmD1VGA_CONTROL,
+ (d1vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+ D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+ WREG32(mmD2VGA_CONTROL,
+ (d2vga_control & ~(D1VGA_CONTROL__D1VGA_MODE_ENABLE_MASK |
+ D1VGA_CONTROL__D1VGA_TIMING_SELECT_MASK)));
+ WREG32(mmVGA_RENDER_CONTROL,
+ (vga_render_control & ~VGA_RENDER_CONTROL__VGA_VSTATUS_CNTL_MASK));
}
WREG32(R600_ROM_CNTL, rom_cntl | R600_SCK_OVERWRITE);
r = amdgpu_read_bios(adev);
/* restore regs */
- WREG32(R600_BUS_CNTL, bus_cntl);
+ WREG32(mmBUS_CNTL, bus_cntl);
if (adev->mode_info.num_crtc) {
- WREG32(AVIVO_D1VGA_CONTROL, d1vga_control);
- WREG32(AVIVO_D2VGA_CONTROL, d2vga_control);
- WREG32(VGA_RENDER_CONTROL, vga_render_control);
+ WREG32(mmD1VGA_CONTROL, d1vga_control);
+ WREG32(mmD2VGA_CONTROL, d2vga_control);
+ WREG32(mmVGA_RENDER_CONTROL, vga_render_control);
}
WREG32(R600_ROM_CNTL, rom_cntl);
return r;
@@ -1331,23 +1345,24 @@ static void si_set_clk_bypass_mode(struct amdgpu_device *adev)
{
u32 tmp, i;
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_BYPASS_EN;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_BYPASS_EN_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
- tmp |= SPLL_CTLREQ_CHG;
- WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2);
+ tmp |= CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp);
for (i = 0; i < adev->usec_timeout; i++) {
- if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
+ if (RREG32(mmCG_SPLL_STATUS) & CG_SPLL_STATUS__SPLL_CHG_STATUS_MASK)
break;
udelay(1);
}
- tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
- tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
- WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL_2);
+ tmp &= ~(CG_SPLL_FUNC_CNTL_2__SPLL_CTLREQ_CHG_MASK |
+ CG_SPLL_FUNC_CNTL_2__SCLK_MUX_UPDATE_MASK);
+ WREG32(mmCG_SPLL_FUNC_CNTL_2, tmp);
tmp = RREG32(MPLL_CNTL_MODE);
tmp &= ~MPLL_MCLK_SEL;
@@ -1358,21 +1373,21 @@ static void si_spll_powerdown(struct amdgpu_device *adev)
{
u32 tmp;
- tmp = RREG32(SPLL_CNTL_MODE);
- tmp |= SPLL_SW_DIR_CONTROL;
- WREG32(SPLL_CNTL_MODE, tmp);
+ tmp = RREG32(mmSPLL_CNTL_MODE);
+ tmp |= SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK;
+ WREG32(mmSPLL_CNTL_MODE, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_RESET;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_RESET_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(CG_SPLL_FUNC_CNTL);
- tmp |= SPLL_SLEEP;
- WREG32(CG_SPLL_FUNC_CNTL, tmp);
+ tmp = RREG32(mmCG_SPLL_FUNC_CNTL);
+ tmp |= CG_SPLL_FUNC_CNTL__SPLL_SLEEP_MASK;
+ WREG32(mmCG_SPLL_FUNC_CNTL, tmp);
- tmp = RREG32(SPLL_CNTL_MODE);
- tmp &= ~SPLL_SW_DIR_CONTROL;
- WREG32(SPLL_CNTL_MODE, tmp);
+ tmp = RREG32(mmSPLL_CNTL_MODE);
+ tmp &= ~SPLL_CNTL_MODE__SPLL_SW_DIR_CONTROL_MASK;
+ WREG32(mmSPLL_CNTL_MODE, tmp);
}
static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
@@ -1454,14 +1469,14 @@ static void si_vga_set_state(struct amdgpu_device *adev, bool state)
{
uint32_t temp;
- temp = RREG32(CONFIG_CNTL);
+ temp = RREG32(mmCONFIG_CNTL);
if (!state) {
temp &= ~(1<<0);
temp |= (1<<1);
} else {
temp &= ~(1<<1);
}
- WREG32(CONFIG_CNTL, temp);
+ WREG32(mmCONFIG_CNTL, temp);
}
static u32 si_get_xclk(struct amdgpu_device *adev)
@@ -1469,12 +1484,12 @@ static u32 si_get_xclk(struct amdgpu_device *adev)
u32 reference_clock = adev->clock.spll.reference_freq;
u32 tmp;
- tmp = RREG32(CG_CLKPIN_CNTL_2);
- if (tmp & MUX_TCLK_TO_XCLK)
+ tmp = RREG32(mmCG_CLKPIN_CNTL_2);
+ if (tmp & CG_CLKPIN_CNTL_2__MUX_TCLK_TO_XCLK_MASK)
return TCLK;
- tmp = RREG32(CG_CLKPIN_CNTL);
- if (tmp & XTALIN_DIVIDE)
+ tmp = RREG32(mmCG_CLKPIN_CNTL);
+ if (tmp & CG_CLKPIN_CNTL__XTALIN_DIVIDE_MASK)
return reference_clock / 4;
return reference_clock;
@@ -1519,9 +1534,9 @@ static int si_get_pcie_lanes(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
return 0;
- link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+ link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
- switch ((link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT) {
+ switch ((link_width_cntl & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT) {
case LC_LINK_WIDTH_X1:
return 1;
case LC_LINK_WIDTH_X2:
@@ -1568,13 +1583,13 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
return;
}
- link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- link_width_cntl &= ~LC_LINK_WIDTH_MASK;
- link_width_cntl |= mask << LC_LINK_WIDTH_SHIFT;
- link_width_cntl |= (LC_RECONFIG_NOW |
- LC_RECONFIG_ARC_MISSING_ESCAPE);
+ link_width_cntl = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ link_width_cntl &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK;
+ link_width_cntl |= mask << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT;
+ link_width_cntl |= (PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK |
+ PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_ARC_MISSING_ESCAPE_MASK);
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
}
static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
@@ -1888,7 +1903,7 @@ static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev)
WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) {
- DRM_ERROR("Timeout setting UVD clocks!\n");
+ DRM_ERROR("Timeout setting VCE clocks!\n");
return -ETIMEDOUT;
}
@@ -2018,13 +2033,13 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
{
- return (RREG32(CC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
+ return (RREG32(mmCC_DRM_ID_STRAPS) & CC_DRM_ID_STRAPS__ATI_REV_ID_MASK)
>> CC_DRM_ID_STRAPS__ATI_REV_ID__SHIFT;
}
-static int si_common_early_init(void *handle)
+static int si_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->smc_rreg = &si_smc_rreg;
adev->smc_wreg = &si_smc_wreg;
@@ -2148,17 +2163,6 @@ static int si_common_early_init(void *handle)
return 0;
}
-static int si_common_sw_init(void *handle)
-{
- return 0;
-}
-
-static int si_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-
static void si_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
@@ -2250,9 +2254,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
return;
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
- LC_CURRENT_DATA_RATE_SHIFT;
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ current_data_rate = (speed_cntl & PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) >>
+ PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
if (current_data_rate == 2) {
DRM_INFO("PCIE gen 3 link speeds already enabled\n");
@@ -2279,17 +2283,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
- tmp = RREG32_PCIE(PCIE_LC_STATUS1);
- max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
- current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
+ tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH__SHIFT;
+ current_lw = (tmp & PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH_MASK) >> PCIE_LC_STATUS1__LC_OPERATING_LINK_WIDTH__SHIFT;
if (current_lw < max_lw) {
- tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- if (tmp & LC_RENEGOTIATION_SUPPORT) {
- tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
- tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
- tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ if (tmp & PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATION_SUPPORT_MASK) {
+ tmp &= ~(PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_DIS_MASK);
+ tmp |= (max_lw << PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH__SHIFT);
+ tmp |= PCIE_LC_LINK_WIDTH_CNTL__LC_UPCONFIGURE_SUPPORT_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RENEGOTIATE_EN_MASK | PCIE_LC_LINK_WIDTH_CNTL__LC_RECONFIG_NOW_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, tmp);
}
}
@@ -2312,13 +2316,13 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
PCI_EXP_LNKCTL2,
&gpu_cfg2);
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp |= LC_SET_QUIESCE;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp |= LC_REDO_EQ;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
mdelay(100);
@@ -2344,16 +2348,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
(PCI_EXP_LNKCTL2_ENTER_COMP |
PCI_EXP_LNKCTL2_TX_MARGIN));
- tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
- tmp &= ~LC_SET_QUIESCE;
- WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+ tmp = RREG32_PCIE_PORT(ixPCIE_LC_CNTL4);
+ tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL4, tmp);
}
}
}
- speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
- speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
- WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+ speed_cntl |= PCIE_LC_SPEED_CNTL__LC_FORCE_EN_SW_SPEED_CHANGE_MASK | PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_HW_SPEED_CHANGE_MASK;
+ speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl);
tmp16 = 0;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
@@ -2365,13 +2369,13 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2,
PCI_EXP_LNKCTL2_TLS, tmp16);
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
- WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
+ WREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL, speed_cntl);
for (i = 0; i < adev->usec_timeout; i++) {
- speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
- if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
+ speed_cntl = RREG32_PCIE_PORT(ixPCIE_LC_SPEED_CNTL);
+ if ((speed_cntl & PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK) == 0)
break;
udelay(1);
}
@@ -2429,121 +2433,121 @@ static void si_program_aspm(struct amdgpu_device *adev)
if (!amdgpu_device_should_use_aspm(adev))
return;
- orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
- data &= ~LC_XMIT_N_FTS_MASK;
- data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL);
+ data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK;
+ data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) | PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_OVERRIDE_EN_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL, data);
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
- data |= LC_GO_TO_RECOVERY;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_GO_TO_RECOVERY_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL3, data);
- orig = data = RREG32_PCIE(PCIE_P_CNTL);
- data |= P_IGNORE_EDB_ERR;
+ orig = data = RREG32_PCIE(ixPCIE_P_CNTL);
+ data |= PCIE_P_CNTL__P_IGNORE_EDB_ERR_MASK;
if (orig != data)
- WREG32_PCIE(PCIE_P_CNTL, data);
+ WREG32_PCIE(ixPCIE_P_CNTL, data);
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
- data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
- data |= LC_PMI_TO_L1_DIS;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL);
+ data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK | PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK);
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (!disable_l0s)
- data |= LC_L0S_INACTIVITY(7);
+ data |= (7 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT);
if (!disable_l1) {
- data |= LC_L1_INACTIVITY(7);
- data &= ~LC_PMI_TO_L1_DIS;
+ data |= (7 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT);
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
if (!disable_plloff_in_l1) {
bool clk_req_support;
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
- data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
- data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0);
+ data &= ~(PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB0_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
- data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
- data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1);
+ data &= ~(PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB0_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
- data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
- data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0);
+ data &= ~(PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0_MASK | PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0_MASK);
+ data |= (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_OFF_0__SHIFT) | (7 << PB1_PIF_PWRDOWN_0__PLL_POWER_STATE_IN_TXS2_0__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
- data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
- data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1);
+ data &= ~(PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1_MASK | PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1_MASK);
+ data |= (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_OFF_1__SHIFT) | (7 << PB1_PIF_PWRDOWN_1__PLL_POWER_STATE_IN_TXS2_1__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data);
if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) {
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);
- data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_0);
+ data &= ~PB0_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_0, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_1);
- data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_1);
+ data &= ~PB0_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_1, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_2);
- data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_2);
+ data &= ~PB0_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_2, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_2, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_3);
- data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_PWRDOWN_3);
+ data &= ~PB0_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK;
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_PWRDOWN_3, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_PWRDOWN_3, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_0);
- data &= ~PLL_RAMP_UP_TIME_0_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_0);
+ data &= ~PB1_PIF_PWRDOWN_0__PLL_RAMP_UP_TIME_0_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_0, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_0, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_1);
- data &= ~PLL_RAMP_UP_TIME_1_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_1);
+ data &= ~PB1_PIF_PWRDOWN_1__PLL_RAMP_UP_TIME_1_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_1, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_2);
- data &= ~PLL_RAMP_UP_TIME_2_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_2);
+ data &= ~PB1_PIF_PWRDOWN_2__PLL_RAMP_UP_TIME_2_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_2, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_2, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_PWRDOWN_3);
- data &= ~PLL_RAMP_UP_TIME_3_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_PWRDOWN_3);
+ data &= ~PB1_PIF_PWRDOWN_3__PLL_RAMP_UP_TIME_3_MASK;
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_3, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_PWRDOWN_3, data);
}
- orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
- data &= ~LC_DYN_LANES_PWR_STATE_MASK;
- data |= LC_DYN_LANES_PWR_STATE(3);
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL);
+ data &= ~PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE_MASK;
+ data |= (3 << PCIE_LC_LINK_WIDTH_CNTL__LC_DYN_LANES_PWR_STATE__SHIFT);
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_LINK_WIDTH_CNTL, data);
- orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL);
- data &= ~LS2_EXIT_TIME_MASK;
+ orig = data = si_pif_phy0_rreg(adev,ixPB0_PIF_CNTL);
+ data &= ~PB0_PIF_CNTL__LS2_EXIT_TIME_MASK;
if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
- data |= LS2_EXIT_TIME(5);
+ data |= (5 << PB0_PIF_CNTL__LS2_EXIT_TIME__SHIFT);
if (orig != data)
- si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data);
+ si_pif_phy0_wreg(adev,ixPB0_PIF_CNTL, data);
- orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL);
- data &= ~LS2_EXIT_TIME_MASK;
+ orig = data = si_pif_phy1_rreg(adev,ixPB1_PIF_CNTL);
+ data &= ~PB1_PIF_CNTL__LS2_EXIT_TIME_MASK;
if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))
- data |= LS2_EXIT_TIME(5);
+ data |= (5 << PB1_PIF_CNTL__LS2_EXIT_TIME__SHIFT);
if (orig != data)
- si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data);
+ si_pif_phy1_wreg(adev,ixPB1_PIF_CNTL, data);
if (!disable_clkreq &&
!pci_is_root_bus(adev->pdev->bus)) {
@@ -2559,64 +2563,64 @@ static void si_program_aspm(struct amdgpu_device *adev)
}
if (clk_req_support) {
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
- data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL2);
+ data |= PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL2, data);
- orig = data = RREG32(THM_CLK_CNTL);
- data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
- data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
+ orig = data = RREG32(mmTHM_CLK_CNTL);
+ data &= ~(THM_CLK_CNTL__CMON_CLK_SEL_MASK | THM_CLK_CNTL__TMON_CLK_SEL_MASK);
+ data |= (1 << THM_CLK_CNTL__CMON_CLK_SEL__SHIFT) | (1 << THM_CLK_CNTL__TMON_CLK_SEL__SHIFT);
if (orig != data)
- WREG32(THM_CLK_CNTL, data);
+ WREG32(mmTHM_CLK_CNTL, data);
- orig = data = RREG32(MISC_CLK_CNTL);
- data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
- data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
+ orig = data = RREG32(mmMISC_CLK_CNTL);
+ data &= ~(MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL_MASK | MISC_CLK_CNTL__ZCLK_SEL_MASK);
+ data |= (1 << MISC_CLK_CNTL__DEEP_SLEEP_CLK_SEL__SHIFT) | (1 << MISC_CLK_CNTL__ZCLK_SEL__SHIFT);
if (orig != data)
- WREG32(MISC_CLK_CNTL, data);
+ WREG32(mmMISC_CLK_CNTL, data);
- orig = data = RREG32(CG_CLKPIN_CNTL);
- data &= ~BCLK_AS_XCLK;
+ orig = data = RREG32(mmCG_CLKPIN_CNTL);
+ data &= ~CG_CLKPIN_CNTL__BCLK_AS_XCLK_MASK;
if (orig != data)
- WREG32(CG_CLKPIN_CNTL, data);
+ WREG32(mmCG_CLKPIN_CNTL, data);
- orig = data = RREG32(CG_CLKPIN_CNTL_2);
- data &= ~FORCE_BIF_REFCLK_EN;
+ orig = data = RREG32(mmCG_CLKPIN_CNTL_2);
+ data &= ~CG_CLKPIN_CNTL_2__FORCE_BIF_REFCLK_EN_MASK;
if (orig != data)
- WREG32(CG_CLKPIN_CNTL_2, data);
+ WREG32(mmCG_CLKPIN_CNTL_2, data);
- orig = data = RREG32(MPLL_BYPASSCLK_SEL);
- data &= ~MPLL_CLKOUT_SEL_MASK;
- data |= MPLL_CLKOUT_SEL(4);
+ orig = data = RREG32(mmMPLL_BYPASSCLK_SEL);
+ data &= ~MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL_MASK;
+ data |= 4 << MPLL_BYPASSCLK_SEL__MPLL_CLKOUT_SEL__SHIFT;
if (orig != data)
- WREG32(MPLL_BYPASSCLK_SEL, data);
+ WREG32(mmMPLL_BYPASSCLK_SEL, data);
- orig = data = RREG32(SPLL_CNTL_MODE);
- data &= ~SPLL_REFCLK_SEL_MASK;
+ orig = data = RREG32(mmSPLL_CNTL_MODE);
+ data &= ~SPLL_CNTL_MODE__SPLL_REFCLK_SEL_MASK;
if (orig != data)
- WREG32(SPLL_CNTL_MODE, data);
+ WREG32(mmSPLL_CNTL_MODE, data);
}
}
} else {
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
}
- orig = data = RREG32_PCIE(PCIE_CNTL2);
- data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
+ orig = data = RREG32_PCIE(ixPCIE_CNTL2);
+ data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK;
if (orig != data)
- WREG32_PCIE(PCIE_CNTL2, data);
+ WREG32_PCIE(ixPCIE_CNTL2, data);
if (!disable_l0s) {
- data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
- if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
- data = RREG32_PCIE(PCIE_LC_STATUS1);
- if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
- orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
- data &= ~LC_L0S_INACTIVITY_MASK;
+ data = RREG32_PCIE_PORT(ixPCIE_LC_N_FTS_CNTL);
+ if((data & PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) == PCIE_LC_N_FTS_CNTL__LC_N_FTS_MASK) {
+ data = RREG32_PCIE(ixPCIE_LC_STATUS1);
+ if ((data & PCIE_LC_STATUS1__LC_REVERSE_XMIT_MASK) && (data & PCIE_LC_STATUS1__LC_REVERSE_RCVR_MASK)) {
+ orig = data = RREG32_PCIE_PORT(ixPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
if (orig != data)
- WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+ WREG32_PCIE_PORT(ixPCIE_LC_CNTL, data);
}
}
}
@@ -2633,9 +2637,9 @@ static void si_fix_pci_max_read_req_size(struct amdgpu_device *adev)
pcie_set_readrq(adev->pdev, 512);
}
-static int si_common_hw_init(void *handle)
+static int si_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
si_fix_pci_max_read_req_size(adev);
si_init_golden_registers(adev);
@@ -2645,47 +2649,28 @@ static int si_common_hw_init(void *handle)
return 0;
}
-static int si_common_hw_fini(void *handle)
+static int si_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
return 0;
}
-static int si_common_suspend(void *handle)
+static int si_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_common_hw_fini(adev);
-}
-
-static int si_common_resume(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_common_hw_init(adev);
+ return si_common_hw_init(ip_block);
}
-static bool si_common_is_idle(void *handle)
+static bool si_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int si_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int si_common_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int si_common_set_clockgating_state(void *handle,
+static int si_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int si_common_set_powergating_state(void *handle,
+static int si_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -2694,20 +2679,12 @@ static int si_common_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_common_ip_funcs = {
.name = "si_common",
.early_init = si_common_early_init,
- .late_init = NULL,
- .sw_init = si_common_sw_init,
- .sw_fini = si_common_sw_fini,
.hw_init = si_common_hw_init,
.hw_fini = si_common_hw_fini,
- .suspend = si_common_suspend,
.resume = si_common_resume,
.is_idle = si_common_is_idle,
- .wait_for_idle = si_common_wait_for_idle,
- .soft_reset = si_common_soft_reset,
.set_clockgating_state = si_common_set_clockgating_state,
.set_powergating_state = si_common_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ip_block_version si_common_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 11db5b755832..7f18e4875287 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -27,6 +27,8 @@
#include "si.h"
#include "sid.h"
+#include "oss/oss_1_0_d.h"
+#include "oss/oss_1_0_sh_mask.h"
const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
{
DMA0_REGISTER_OFFSET,
@@ -38,17 +40,31 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev);
static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev);
static void si_dma_set_irq_funcs(struct amdgpu_device *adev);
+/**
+ * si_dma_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (SI).
+ */
static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring)
{
return *ring->rptr_cpu_addr;
}
+/**
+ * si_dma_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (SI).
+ */
static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
+ return (RREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
}
static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
@@ -56,7 +72,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
}
static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
@@ -117,9 +133,9 @@ static void si_dma_stop(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
/* dma0 */
- rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);
- rb_cntl &= ~DMA_RB_ENABLE;
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
+ rb_cntl = RREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i]);
+ rb_cntl &= ~DMA_GFX_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
}
}
@@ -133,44 +149,44 @@ static int si_dma_start(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
- WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
- WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
+ WREG32(mmDMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
+ WREG32(mmDMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
/* Set ring buffer size in dwords */
rb_bufsz = order_base_2(ring->ring_size / 4);
rb_cntl = rb_bufsz << 1;
#ifdef __BIG_ENDIAN
- rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+ rb_cntl |= DMA_GFX_RB_CNTL__RB_SWAP_ENABLE_MASK | DMA_GFX_RB_CNTL__RPTR_WRITEBACK_SWAP_ENABLE_MASK;
#endif
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
/* Initialize the ring buffer's read and write pointers */
- WREG32(DMA_RB_RPTR + sdma_offsets[i], 0);
- WREG32(DMA_RB_WPTR + sdma_offsets[i], 0);
+ WREG32(mmDMA_GFX_RB_RPTR + sdma_offsets[i], 0);
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], 0);
rptr_addr = ring->rptr_gpu_addr;
- WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
- WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
+ WREG32(mmDMA_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
+ WREG32(mmDMA_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
- rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+ rb_cntl |= DMA_GFX_RB_CNTL__RPTR_WRITEBACK_ENABLE_MASK;
- WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
+ WREG32(mmDMA_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
/* enable DMA IBs */
- ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
+ ib_cntl = DMA_GFX_IB_CNTL__IB_ENABLE_MASK | DMA_GFX_IB_CNTL__CMD_VMID_FORCE_MASK;
#ifdef __BIG_ENDIAN
- ib_cntl |= DMA_IB_SWAP_ENABLE;
+ ib_cntl |= DMA_GFX_IB_CNTL__IB_SWAP_ENABLE_MASK;
#endif
- WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl);
+ WREG32(mmDMA_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
- dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]);
- dma_cntl &= ~CTXEMPTY_INT_ENABLE;
- WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
+ dma_cntl = RREG32(mmDMA_CNTL + sdma_offsets[i]);
+ dma_cntl &= ~DMA_CNTL__CTXEMPTY_INT_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + sdma_offsets[i], dma_cntl);
ring->wptr = 0;
- WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
- WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
+ WREG32(mmDMA_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
+ WREG32(mmDMA_GFX_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_GFX_RB_CNTL__RB_ENABLE_MASK);
r = amdgpu_ring_test_helper(ring);
if (r)
@@ -286,7 +302,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = -EINVAL;
err1:
- amdgpu_ib_free(adev, &ib, NULL);
+ amdgpu_ib_free(&ib, NULL);
dma_fence_put(f);
err0:
amdgpu_device_wb_free(adev, index);
@@ -457,11 +473,11 @@ static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-static int si_dma_early_init(void *handle)
+static int si_dma_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->sdma.num_instances = 2;
+ adev->sdma.num_instances = SDMA_MAX_INSTANCE;
si_dma_set_ring_funcs(adev);
si_dma_set_buffer_funcs(adev);
@@ -471,11 +487,11 @@ static int si_dma_early_init(void *handle)
return 0;
}
-static int si_dma_sw_init(void *handle)
+static int si_dma_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* DMA0 trap event */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
@@ -506,9 +522,9 @@ static int si_dma_sw_init(void *handle)
return r;
}
-static int si_dma_sw_fini(void *handle)
+static int si_dma_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
for (i = 0; i < adev->sdma.num_instances; i++)
@@ -517,61 +533,56 @@ static int si_dma_sw_fini(void *handle)
return 0;
}
-static int si_dma_hw_init(void *handle)
+static int si_dma_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return si_dma_start(adev);
}
-static int si_dma_hw_fini(void *handle)
+static int si_dma_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- si_dma_stop(adev);
+ si_dma_stop(ip_block->adev);
return 0;
}
-static int si_dma_suspend(void *handle)
+static int si_dma_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_dma_hw_fini(adev);
+ return si_dma_hw_fini(ip_block);
}
-static int si_dma_resume(void *handle)
+static int si_dma_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_dma_hw_init(adev);
+ return si_dma_hw_init(ip_block);
}
-static bool si_dma_is_idle(void *handle)
+static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 tmp = RREG32(SRBM_STATUS2);
+ struct amdgpu_device *adev = ip_block->adev;
+
+ u32 tmp = RREG32(mmSRBM_STATUS2);
- if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK))
+ if (tmp & (SRBM_STATUS2__DMA_BUSY_MASK | SRBM_STATUS2__DMA1_BUSY_MASK))
return false;
return true;
}
-static int si_dma_wait_for_idle(void *handle)
+static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (si_dma_is_idle(handle))
+ if (si_dma_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int si_dma_soft_reset(void *handle)
+static int si_dma_soft_reset(struct amdgpu_ip_block *ip_block)
{
DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n");
return 0;
@@ -588,14 +599,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
case AMDGPU_SDMA_IRQ_INSTANCE0:
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
- sdma_cntl &= ~TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET);
+ sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
- sdma_cntl |= TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET);
+ sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
break;
default:
break;
@@ -604,14 +615,14 @@ static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
case AMDGPU_SDMA_IRQ_INSTANCE1:
switch (state) {
case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
- sdma_cntl &= ~TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET);
+ sdma_cntl &= ~DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
break;
case AMDGPU_IRQ_STATE_ENABLE:
- sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
- sdma_cntl |= TRAP_ENABLE;
- WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
+ sdma_cntl = RREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET);
+ sdma_cntl |= DMA_CNTL__TRAP_ENABLE_MASK;
+ WREG32(mmDMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
break;
default:
break;
@@ -634,13 +645,13 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
return 0;
}
-static int si_dma_set_clockgating_state(void *handle,
+static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
u32 orig, data, offset;
int i;
bool enable;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
enable = (state == AMD_CG_STATE_GATE);
@@ -650,11 +661,11 @@ static int si_dma_set_clockgating_state(void *handle,
offset = DMA0_REGISTER_OFFSET;
else
offset = DMA1_REGISTER_OFFSET;
- orig = data = RREG32(DMA_POWER_CNTL + offset);
- data &= ~MEM_POWER_OVERRIDE;
+ orig = data = RREG32(mmDMA_POWER_CNTL + offset);
+ data &= ~DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (data != orig)
- WREG32(DMA_POWER_CNTL + offset, data);
- WREG32(DMA_CLK_CTRL + offset, 0x00000100);
+ WREG32(mmDMA_POWER_CNTL + offset, data);
+ WREG32(mmDMA_CLK_CTRL + offset, 0x00000100);
}
} else {
for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -662,33 +673,33 @@ static int si_dma_set_clockgating_state(void *handle,
offset = DMA0_REGISTER_OFFSET;
else
offset = DMA1_REGISTER_OFFSET;
- orig = data = RREG32(DMA_POWER_CNTL + offset);
- data |= MEM_POWER_OVERRIDE;
+ orig = data = RREG32(mmDMA_POWER_CNTL + offset);
+ data |= DMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (data != orig)
- WREG32(DMA_POWER_CNTL + offset, data);
+ WREG32(mmDMA_POWER_CNTL + offset, data);
- orig = data = RREG32(DMA_CLK_CTRL + offset);
+ orig = data = RREG32(mmDMA_CLK_CTRL + offset);
data = 0xff000000;
if (data != orig)
- WREG32(DMA_CLK_CTRL + offset, data);
+ WREG32(mmDMA_CLK_CTRL + offset, data);
}
}
return 0;
}
-static int si_dma_set_powergating_state(void *handle,
+static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- WREG32(DMA_PGFSM_WRITE, 0x00002000);
- WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
+ WREG32(mmDMA_PGFSM_WRITE, 0x00002000);
+ WREG32(mmDMA_PGFSM_CONFIG, 0x100010ff);
for (tmp = 0; tmp < 5; tmp++)
- WREG32(DMA_PGFSM_WRITE, 0);
+ WREG32(mmDMA_PGFSM_WRITE, 0);
return 0;
}
@@ -696,7 +707,6 @@ static int si_dma_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_dma_ip_funcs = {
.name = "si_dma",
.early_init = si_dma_early_init,
- .late_init = NULL,
.sw_init = si_dma_sw_init,
.sw_fini = si_dma_sw_fini,
.hw_init = si_dma_hw_init,
@@ -708,8 +718,6 @@ static const struct amd_ip_funcs si_dma_ip_funcs = {
.soft_reset = si_dma_soft_reset,
.set_clockgating_state = si_dma_set_clockgating_state,
.set_powergating_state = si_dma_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs si_dma_ring_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h
index 4e935baa7b91..6da65778292b 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_enums.h
+++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h
@@ -23,123 +23,15 @@
#ifndef SI_ENUMS_H
#define SI_ENUMS_H
-#define VBLANK_INT_MASK (1 << 0)
-#define DC_HPDx_INT_EN (1 << 16)
-#define VBLANK_ACK (1 << 4)
-#define VLINE_ACK (1 << 4)
-
-#define CURSOR_WIDTH 64
-#define CURSOR_HEIGHT 64
-
-#define VGA_VSTATUS_CNTL 0xFFFCFFFF
#define PRIORITY_MARK_MASK 0x7fff
#define PRIORITY_OFF (1 << 16)
#define PRIORITY_ALWAYS_ON (1 << 20)
-#define INTERLEAVE_EN (1 << 0)
-
-#define LATENCY_WATERMARK_MASK(x) ((x) << 16)
-#define DC_LB_MEMORY_CONFIG(x) ((x) << 20)
-#define ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8)
-
-#define GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0)
-#define GRPH_ENDIAN_NONE 0
-#define GRPH_ENDIAN_8IN16 1
-#define GRPH_ENDIAN_8IN32 2
-#define GRPH_ENDIAN_8IN64 3
-#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
-#define GRPH_RED_SEL_R 0
-#define GRPH_RED_SEL_G 1
-#define GRPH_RED_SEL_B 2
-#define GRPH_RED_SEL_A 3
-#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
-#define GRPH_GREEN_SEL_G 0
-#define GRPH_GREEN_SEL_B 1
-#define GRPH_GREEN_SEL_A 2
-#define GRPH_GREEN_SEL_R 3
-#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
-#define GRPH_BLUE_SEL_B 0
-#define GRPH_BLUE_SEL_A 1
-#define GRPH_BLUE_SEL_R 2
-#define GRPH_BLUE_SEL_G 3
-#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
-#define GRPH_ALPHA_SEL_A 0
-#define GRPH_ALPHA_SEL_R 1
-#define GRPH_ALPHA_SEL_G 2
-#define GRPH_ALPHA_SEL_B 3
-
-#define GRPH_DEPTH(x) (((x) & 0x3) << 0)
-#define GRPH_DEPTH_8BPP 0
-#define GRPH_DEPTH_16BPP 1
-#define GRPH_DEPTH_32BPP 2
-
-#define GRPH_FORMAT(x) (((x) & 0x7) << 8)
-#define GRPH_FORMAT_INDEXED 0
-#define GRPH_FORMAT_ARGB1555 0
-#define GRPH_FORMAT_ARGB565 1
-#define GRPH_FORMAT_ARGB4444 2
-#define GRPH_FORMAT_AI88 3
-#define GRPH_FORMAT_MONO16 4
-#define GRPH_FORMAT_BGRA5551 5
-#define GRPH_FORMAT_ARGB8888 0
-#define GRPH_FORMAT_ARGB2101010 1
-#define GRPH_FORMAT_32BPP_DIG 2
-#define GRPH_FORMAT_8B_ARGB2101010 3
-#define GRPH_FORMAT_BGRA1010102 4
-#define GRPH_FORMAT_8B_BGRA1010102 5
-#define GRPH_FORMAT_RGB111110 6
-#define GRPH_FORMAT_BGR101111 7
-
-#define GRPH_NUM_BANKS(x) (((x) & 0x3) << 2)
-#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define GRPH_ARRAY_LINEAR_GENERAL 0
-#define GRPH_ARRAY_LINEAR_ALIGNED 1
-#define GRPH_ARRAY_1D_TILED_THIN1 2
-#define GRPH_ARRAY_2D_TILED_THIN1 4
-#define GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13)
-#define GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6)
-#define GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11)
-#define GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18)
-#define GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define GRPH_PIPE_CONFIG(x) (((x) & 0x1f) << 24)
-
-#define CURSOR_EN (1 << 0)
-#define CURSOR_MODE(x) (((x) & 0x3) << 8)
-#define CURSOR_MONO 0
-#define CURSOR_24_1 1
-#define CURSOR_24_8_PRE_MULT 2
-#define CURSOR_24_8_UNPRE_MULT 3
-#define CURSOR_2X_MAGNIFY (1 << 16)
-#define CURSOR_FORCE_MC_ON (1 << 20)
-#define CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24)
-#define CURSOR_URGENT_ALWAYS 0
-#define CURSOR_URGENT_1_8 1
-#define CURSOR_URGENT_1_4 2
-#define CURSOR_URGENT_3_8 3
-#define CURSOR_URGENT_1_2 4
-#define CURSOR_UPDATE_PENDING (1 << 0)
-#define CURSOR_UPDATE_TAKEN (1 << 1)
-#define CURSOR_UPDATE_LOCK (1 << 16)
-#define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
-
-#define SI_CRTC0_REGISTER_OFFSET 0
-#define SI_CRTC1_REGISTER_OFFSET 0x300
-#define SI_CRTC2_REGISTER_OFFSET 0x2600
-#define SI_CRTC3_REGISTER_OFFSET 0x2900
-#define SI_CRTC4_REGISTER_OFFSET 0x2c00
-#define SI_CRTC5_REGISTER_OFFSET 0x2f00
-
-#define DMA0_REGISTER_OFFSET 0x000
-#define DMA1_REGISTER_OFFSET 0x200
-#define ES_AND_GS_AUTO 3
-#define RADEON_PACKET_TYPE3 3
-#define CE_PARTITION_BASE 3
-#define BUF_SWAP_32BIT (2 << 16)
#define GFX_POWER_STATUS (1 << 1)
#define GFX_CLOCK_STATUS (1 << 2)
#define GFX_LS_STATUS (1 << 3)
-#define RLC_BUSY_STATUS (1 << 0)
+#define RLC_BUSY_STATUS (1 << 0)
#define RLC_PUD(x) ((x) << 0)
#define RLC_PUD_MASK (0xff << 0)
#define RLC_PDD(x) ((x) << 8)
@@ -148,144 +40,8 @@
#define RLC_TTPD_MASK (0xff << 16)
#define RLC_MSD(x) ((x) << 24)
#define RLC_MSD_MASK (0xff << 24)
-#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
-#define WRITE_DATA_DST_SEL(x) ((x) << 8)
-#define EVENT_TYPE(x) ((x) << 0)
-#define EVENT_INDEX(x) ((x) << 8)
-#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
-#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
-#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
-#define GFX6_NUM_GFX_RINGS 1
-#define GFX6_NUM_COMPUTE_RINGS 2
#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
-#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
-#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x02010002
-#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02011003
-
-#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \
- (((op) & 0xFF) << 8) | \
- ((n) & 0x3FFF) << 16)
-#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
-#define PACKET3_NOP 0x10
-#define PACKET3_SET_BASE 0x11
-#define PACKET3_BASE_INDEX(x) ((x) << 0)
-#define PACKET3_CLEAR_STATE 0x12
-#define PACKET3_INDEX_BUFFER_SIZE 0x13
-#define PACKET3_DISPATCH_DIRECT 0x15
-#define PACKET3_DISPATCH_INDIRECT 0x16
-#define PACKET3_ALLOC_GDS 0x1B
-#define PACKET3_WRITE_GDS_RAM 0x1C
-#define PACKET3_ATOMIC_GDS 0x1D
-#define PACKET3_ATOMIC 0x1E
-#define PACKET3_OCCLUSION_QUERY 0x1F
-#define PACKET3_SET_PREDICATION 0x20
-#define PACKET3_REG_RMW 0x21
-#define PACKET3_COND_EXEC 0x22
-#define PACKET3_PRED_EXEC 0x23
-#define PACKET3_DRAW_INDIRECT 0x24
-#define PACKET3_DRAW_INDEX_INDIRECT 0x25
-#define PACKET3_INDEX_BASE 0x26
-#define PACKET3_DRAW_INDEX_2 0x27
-#define PACKET3_CONTEXT_CONTROL 0x28
-#define PACKET3_INDEX_TYPE 0x2A
-#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
-#define PACKET3_DRAW_INDEX_AUTO 0x2D
-#define PACKET3_DRAW_INDEX_IMMD 0x2E
-#define PACKET3_NUM_INSTANCES 0x2F
-#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
-#define PACKET3_INDIRECT_BUFFER_CONST 0x31
-#define PACKET3_INDIRECT_BUFFER 0x3F
-#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
-#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
-#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36
-#define PACKET3_WRITE_DATA 0x37
-#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
-#define PACKET3_MEM_SEMAPHORE 0x39
-#define PACKET3_MPEG_INDEX 0x3A
-#define PACKET3_COPY_DW 0x3B
-#define PACKET3_WAIT_REG_MEM 0x3C
-#define PACKET3_MEM_WRITE 0x3D
-#define PACKET3_COPY_DATA 0x40
-#define PACKET3_CP_DMA 0x41
-# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
-# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
-# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
-# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
-# define PACKET3_CP_DMA_DIS_WC (1 << 21)
-# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
-# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
-# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
-# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
-# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
-# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
-# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30)
-#define PACKET3_PFP_SYNC_ME 0x42
-#define PACKET3_SURFACE_SYNC 0x43
-# define PACKET3_DEST_BASE_0_ENA (1 << 0)
-# define PACKET3_DEST_BASE_1_ENA (1 << 1)
-# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
-# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
-# define PACKET3_CB2_DEST_BASE_ENA (1 << 8)
-# define PACKET3_CB3_DEST_BASE_ENA (1 << 9)
-# define PACKET3_CB4_DEST_BASE_ENA (1 << 10)
-# define PACKET3_CB5_DEST_BASE_ENA (1 << 11)
-# define PACKET3_CB6_DEST_BASE_ENA (1 << 12)
-# define PACKET3_CB7_DEST_BASE_ENA (1 << 13)
-# define PACKET3_DB_DEST_BASE_ENA (1 << 14)
-# define PACKET3_DEST_BASE_2_ENA (1 << 19)
-# define PACKET3_DEST_BASE_3_ENA (1 << 21)
-# define PACKET3_TCL1_ACTION_ENA (1 << 22)
-# define PACKET3_TC_ACTION_ENA (1 << 23)
-# define PACKET3_CB_ACTION_ENA (1 << 25)
-# define PACKET3_DB_ACTION_ENA (1 << 26)
-# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
-# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
-#define PACKET3_ME_INITIALIZE 0x44
-#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
-#define PACKET3_COND_WRITE 0x45
-#define PACKET3_EVENT_WRITE 0x46
-#define PACKET3_EVENT_WRITE_EOP 0x47
-#define PACKET3_EVENT_WRITE_EOS 0x48
-#define PACKET3_PREAMBLE_CNTL 0x4A
-# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
-# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
-#define PACKET3_ONE_REG_WRITE 0x57
-#define PACKET3_LOAD_CONFIG_REG 0x5F
-#define PACKET3_LOAD_CONTEXT_REG 0x60
-#define PACKET3_LOAD_SH_REG 0x61
-#define PACKET3_SET_CONFIG_REG 0x68
-#define PACKET3_SET_CONFIG_REG_START 0x00002000
-#define PACKET3_SET_CONFIG_REG_END 0x00002c00
-#define PACKET3_SET_CONTEXT_REG 0x69
-#define PACKET3_SET_CONTEXT_REG_START 0x000a000
-#define PACKET3_SET_CONTEXT_REG_END 0x000a400
-#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
-#define PACKET3_SET_RESOURCE_INDIRECT 0x74
-#define PACKET3_SET_SH_REG 0x76
-#define PACKET3_SET_SH_REG_START 0x00002c00
-#define PACKET3_SET_SH_REG_END 0x00003000
-#define PACKET3_SET_SH_REG_OFFSET 0x77
-#define PACKET3_ME_WRITE 0x7A
-#define PACKET3_SCRATCH_RAM_WRITE 0x7D
-#define PACKET3_SCRATCH_RAM_READ 0x7E
-#define PACKET3_CE_WRITE 0x7F
-#define PACKET3_LOAD_CONST_RAM 0x80
-#define PACKET3_WRITE_CONST_RAM 0x81
-#define PACKET3_WRITE_CONST_RAM_OFFSET 0x82
-#define PACKET3_DUMP_CONST_RAM 0x83
-#define PACKET3_INCREMENT_CE_COUNTER 0x84
-#define PACKET3_INCREMENT_DE_COUNTER 0x85
-#define PACKET3_WAIT_ON_CE_COUNTER 0x86
-#define PACKET3_WAIT_ON_DE_COUNTER 0x87
-#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
-#define PACKET3_SET_CE_DE_COUNTERS 0x89
-#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
-#define PACKET3_SWITCH_BUFFER 0x8B
-#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12)
-#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
-#define PACKET3_SEM_SEL_WAIT (0x7 << 29)
-
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 5237395e4fab..1df00f8a2406 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -27,6 +27,7 @@
#include "amdgpu_ih.h"
#include "sid.h"
#include "si_ih.h"
+
#include "oss/oss_1_0_d.h"
#include "oss/oss_1_0_sh_mask.h"
@@ -156,19 +157,19 @@ static void si_ih_set_rptr(struct amdgpu_device *adev,
WREG32(IH_RB_RPTR, ih->rptr);
}
-static int si_ih_early_init(void *handle)
+static int si_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
si_ih_set_interrupt_funcs(adev);
return 0;
}
-static int si_ih_sw_init(void *handle)
+static int si_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
@@ -177,49 +178,43 @@ static int si_ih_sw_init(void *handle)
return amdgpu_irq_init(adev);
}
-static int si_ih_sw_fini(void *handle)
+static int si_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int si_ih_hw_init(void *handle)
+static int si_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return si_ih_irq_init(adev);
}
-static int si_ih_hw_fini(void *handle)
+static int si_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- si_ih_irq_disable(adev);
+ si_ih_irq_disable(ip_block->adev);
return 0;
}
-static int si_ih_suspend(void *handle)
+static int si_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_ih_hw_fini(adev);
+ return si_ih_hw_fini(ip_block);
}
-static int si_ih_resume(void *handle)
+static int si_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return si_ih_hw_init(adev);
+ return si_ih_hw_init(ip_block);
}
-static bool si_ih_is_idle(void *handle)
+static bool si_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 tmp = RREG32(SRBM_STATUS);
+ struct amdgpu_device *adev = ip_block->adev;
+ u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
return false;
@@ -227,41 +222,41 @@ static bool si_ih_is_idle(void *handle)
return true;
}
-static int si_ih_wait_for_idle(void *handle)
+static int si_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (si_ih_is_idle(handle))
+ if (si_ih_is_idle(ip_block))
return 0;
udelay(1);
}
return -ETIMEDOUT;
}
-static int si_ih_soft_reset(void *handle)
+static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
- u32 tmp = RREG32(SRBM_STATUS);
+ u32 tmp = RREG32(mmSRBM_STATUS);
if (tmp & SRBM_STATUS__IH_BUSY_MASK)
srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK;
if (srbm_soft_reset) {
- tmp = RREG32(SRBM_SOFT_RESET);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(SRBM_SOFT_RESET, tmp);
- tmp = RREG32(SRBM_SOFT_RESET);
+ dev_info(adev->dev, "mmSRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
udelay(50);
tmp &= ~srbm_soft_reset;
- WREG32(SRBM_SOFT_RESET, tmp);
- tmp = RREG32(SRBM_SOFT_RESET);
+ WREG32(mmSRBM_SOFT_RESET, tmp);
+ tmp = RREG32(mmSRBM_SOFT_RESET);
udelay(50);
}
@@ -269,13 +264,13 @@ static int si_ih_soft_reset(void *handle)
return 0;
}
-static int si_ih_set_clockgating_state(void *handle,
+static int si_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int si_ih_set_powergating_state(void *handle,
+static int si_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -284,7 +279,6 @@ static int si_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs si_ih_ip_funcs = {
.name = "si_ih",
.early_init = si_ih_early_init,
- .late_init = NULL,
.sw_init = si_ih_sw_init,
.sw_fini = si_ih_sw_fini,
.hw_init = si_ih_hw_init,
@@ -296,8 +290,6 @@ static const struct amd_ip_funcs si_ih_ip_funcs = {
.soft_reset = si_ih_soft_reset,
.set_clockgating_state = si_ih_set_clockgating_state,
.set_powergating_state = si_ih_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs si_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h
index 9a39cbfe6db9..cbd4f8951cfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/sid.h
+++ b/drivers/gpu/drm/amd/amdgpu/sid.h
@@ -24,47 +24,12 @@
#ifndef SI_H
#define SI_H
-#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2
-
-#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
-#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
-#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001
-
-#define SI_MAX_SH_GPRS 256
-#define SI_MAX_TEMP_GPRS 16
-#define SI_MAX_SH_THREADS 256
-#define SI_MAX_SH_STACK_ENTRIES 4096
-#define SI_MAX_FRC_EOV_CNT 16384
-#define SI_MAX_BACKENDS 8
-#define SI_MAX_BACKENDS_MASK 0xFF
-#define SI_MAX_BACKENDS_PER_SE_MASK 0x0F
-#define SI_MAX_SIMDS 12
-#define SI_MAX_SIMDS_MASK 0x0FFF
-#define SI_MAX_SIMDS_PER_SE_MASK 0x00FF
-#define SI_MAX_PIPES 8
-#define SI_MAX_PIPES_MASK 0xFF
-#define SI_MAX_PIPES_PER_SIMD_MASK 0x3F
-#define SI_MAX_LDS_NUM 0xFFFF
-#define SI_MAX_TCC 16
-#define SI_MAX_TCC_MASK 0xFFFF
#define SI_MAX_CTLACKS_ASSERTION_WAIT 100
-/* SMC IND accessor regs */
-#define SMC_IND_INDEX_0 0x80
-#define SMC_IND_DATA_0 0x81
-
-#define SMC_IND_ACCESS_CNTL 0x8A
-# define AUTO_INCREMENT_IND_0 (1 << 0)
-#define SMC_MESSAGE_0 0x8B
-#define SMC_RESP_0 0x8C
-
/* CG IND registers are accessed via SMC indirect space + SMC_CG_IND_START */
#define SMC_CG_IND_START 0xc0030000
#define SMC_CG_IND_END 0xc0040000
-#define CG_CGTT_LOCAL_0 0x400
-#define CG_CGTT_LOCAL_1 0x401
-
/* SMC IND registers */
#define SMC_SYSCON_RESET_CNTL 0x80000000
# define RST_REG (1 << 0)
@@ -72,9 +37,6 @@
# define CK_DISABLE (1 << 0)
# define CKEN (1 << 24)
-#define VGA_HDP_CONTROL 0xCA
-#define VGA_MEMORY_DISABLE (1 << 4)
-
#define DCCG_DISP_SLOW_SELECT_REG 0x13F
#define DCCG_DISP1_SLOW_SELECT(x) ((x) << 0)
#define DCCG_DISP1_SLOW_SELECT_MASK (7 << 0)
@@ -83,47 +45,6 @@
#define DCCG_DISP2_SLOW_SELECT_MASK (7 << 4)
#define DCCG_DISP2_SLOW_SELECT_SHIFT 4
-#define CG_SPLL_FUNC_CNTL 0x180
-#define SPLL_RESET (1 << 0)
-#define SPLL_SLEEP (1 << 1)
-#define SPLL_BYPASS_EN (1 << 3)
-#define SPLL_REF_DIV(x) ((x) << 4)
-#define SPLL_REF_DIV_MASK (0x3f << 4)
-#define SPLL_PDIV_A(x) ((x) << 20)
-#define SPLL_PDIV_A_MASK (0x7f << 20)
-#define SPLL_PDIV_A_SHIFT 20
-#define CG_SPLL_FUNC_CNTL_2 0x181
-#define SCLK_MUX_SEL(x) ((x) << 0)
-#define SCLK_MUX_SEL_MASK (0x1ff << 0)
-#define SPLL_CTLREQ_CHG (1 << 23)
-#define SCLK_MUX_UPDATE (1 << 26)
-#define CG_SPLL_FUNC_CNTL_3 0x182
-#define SPLL_FB_DIV(x) ((x) << 0)
-#define SPLL_FB_DIV_MASK (0x3ffffff << 0)
-#define SPLL_FB_DIV_SHIFT 0
-#define SPLL_DITHEN (1 << 28)
-#define CG_SPLL_FUNC_CNTL_4 0x183
-
-#define SPLL_STATUS 0x185
-#define SPLL_CHG_STATUS (1 << 1)
-#define SPLL_CNTL_MODE 0x186
-#define SPLL_SW_DIR_CONTROL (1 << 0)
-# define SPLL_REFCLK_SEL(x) ((x) << 26)
-# define SPLL_REFCLK_SEL_MASK (3 << 26)
-
-#define CG_SPLL_SPREAD_SPECTRUM 0x188
-#define SSEN (1 << 0)
-#define CLK_S(x) ((x) << 4)
-#define CLK_S_MASK (0xfff << 4)
-#define CLK_S_SHIFT 4
-#define CG_SPLL_SPREAD_SPECTRUM_2 0x189
-#define CLK_V(x) ((x) << 0)
-#define CLK_V_MASK (0x3ffffff << 0)
-#define CLK_V_SHIFT 0
-
-#define CG_SPLL_AUTOSCALE_CNTL 0x18b
-# define AUTOSCALE_ON_SS_CLEAR (1 << 9)
-
/* discrete uvd clocks */
#define CG_UPLL_FUNC_CNTL 0x18d
# define UPLL_RESET_MASK 0x00000001
@@ -153,317 +74,13 @@
#define CG_UPLL_SPREAD_SPECTRUM 0x194
# define SSEN_MASK 0x00000001
-#define MPLL_BYPASSCLK_SEL 0x197
-# define MPLL_CLKOUT_SEL(x) ((x) << 8)
-# define MPLL_CLKOUT_SEL_MASK 0xFF00
-
-#define CG_CLKPIN_CNTL 0x198
-# define XTALIN_DIVIDE (1 << 1)
-# define BCLK_AS_XCLK (1 << 2)
-#define CG_CLKPIN_CNTL_2 0x199
-# define FORCE_BIF_REFCLK_EN (1 << 3)
-# define MUX_TCLK_TO_XCLK (1 << 8)
-
-#define THM_CLK_CNTL 0x19b
-# define CMON_CLK_SEL(x) ((x) << 0)
-# define CMON_CLK_SEL_MASK 0xFF
-# define TMON_CLK_SEL(x) ((x) << 8)
-# define TMON_CLK_SEL_MASK 0xFF00
-#define MISC_CLK_CNTL 0x19c
-# define DEEP_SLEEP_CLK_SEL(x) ((x) << 0)
-# define DEEP_SLEEP_CLK_SEL_MASK 0xFF
-# define ZCLK_SEL(x) ((x) << 8)
-# define ZCLK_SEL_MASK 0xFF00
-
-#define CG_THERMAL_CTRL 0x1c0
-#define DPM_EVENT_SRC(x) ((x) << 0)
-#define DPM_EVENT_SRC_MASK (7 << 0)
-#define DIG_THERM_DPM(x) ((x) << 14)
-#define DIG_THERM_DPM_MASK 0x003FC000
-#define DIG_THERM_DPM_SHIFT 14
-#define CG_THERMAL_STATUS 0x1c1
-#define FDO_PWM_DUTY(x) ((x) << 9)
-#define FDO_PWM_DUTY_MASK (0xff << 9)
-#define FDO_PWM_DUTY_SHIFT 9
-#define CG_THERMAL_INT 0x1c2
-#define DIG_THERM_INTH(x) ((x) << 8)
-#define DIG_THERM_INTH_MASK 0x0000FF00
-#define DIG_THERM_INTH_SHIFT 8
-#define DIG_THERM_INTL(x) ((x) << 16)
-#define DIG_THERM_INTL_MASK 0x00FF0000
-#define DIG_THERM_INTL_SHIFT 16
-#define THERM_INT_MASK_HIGH (1 << 24)
-#define THERM_INT_MASK_LOW (1 << 25)
-
-#define CG_MULT_THERMAL_CTRL 0x1c4
-#define TEMP_SEL(x) ((x) << 20)
-#define TEMP_SEL_MASK (0xff << 20)
-#define TEMP_SEL_SHIFT 20
-#define CG_MULT_THERMAL_STATUS 0x1c5
-#define ASIC_MAX_TEMP(x) ((x) << 0)
-#define ASIC_MAX_TEMP_MASK 0x000001ff
-#define ASIC_MAX_TEMP_SHIFT 0
-#define CTF_TEMP(x) ((x) << 9)
-#define CTF_TEMP_MASK 0x0003fe00
-#define CTF_TEMP_SHIFT 9
-
-#define CG_FDO_CTRL0 0x1d5
-#define FDO_STATIC_DUTY(x) ((x) << 0)
-#define FDO_STATIC_DUTY_MASK 0x000000FF
-#define FDO_STATIC_DUTY_SHIFT 0
-#define CG_FDO_CTRL1 0x1d6
-#define FMAX_DUTY100(x) ((x) << 0)
-#define FMAX_DUTY100_MASK 0x000000FF
-#define FMAX_DUTY100_SHIFT 0
-#define CG_FDO_CTRL2 0x1d7
-#define TMIN(x) ((x) << 0)
-#define TMIN_MASK 0x000000FF
-#define TMIN_SHIFT 0
-#define FDO_PWM_MODE(x) ((x) << 11)
-#define FDO_PWM_MODE_MASK (7 << 11)
-#define FDO_PWM_MODE_SHIFT 11
-#define TACH_PWM_RESP_RATE(x) ((x) << 25)
-#define TACH_PWM_RESP_RATE_MASK (0x7f << 25)
-#define TACH_PWM_RESP_RATE_SHIFT 25
-
-#define CG_TACH_CTRL 0x1dc
-# define EDGE_PER_REV(x) ((x) << 0)
-# define EDGE_PER_REV_MASK (0x7 << 0)
-# define EDGE_PER_REV_SHIFT 0
-# define TARGET_PERIOD(x) ((x) << 3)
-# define TARGET_PERIOD_MASK 0xfffffff8
-# define TARGET_PERIOD_SHIFT 3
-#define CG_TACH_STATUS 0x1dd
-# define TACH_PERIOD(x) ((x) << 0)
-# define TACH_PERIOD_MASK 0xffffffff
-# define TACH_PERIOD_SHIFT 0
-
-#define GENERAL_PWRMGT 0x1e0
-# define GLOBAL_PWRMGT_EN (1 << 0)
-# define STATIC_PM_EN (1 << 1)
-# define THERMAL_PROTECTION_DIS (1 << 2)
-# define THERMAL_PROTECTION_TYPE (1 << 3)
-# define SW_SMIO_INDEX(x) ((x) << 6)
-# define SW_SMIO_INDEX_MASK (1 << 6)
-# define SW_SMIO_INDEX_SHIFT 6
-# define VOLT_PWRMGT_EN (1 << 10)
-# define DYN_SPREAD_SPECTRUM_EN (1 << 23)
-#define CG_TPC 0x1e1
-#define SCLK_PWRMGT_CNTL 0x1e2
-# define SCLK_PWRMGT_OFF (1 << 0)
-# define SCLK_LOW_D1 (1 << 1)
-# define FIR_RESET (1 << 4)
-# define FIR_FORCE_TREND_SEL (1 << 5)
-# define FIR_TREND_MODE (1 << 6)
-# define DYN_GFX_CLK_OFF_EN (1 << 7)
-# define GFX_CLK_FORCE_ON (1 << 8)
-# define GFX_CLK_REQUEST_OFF (1 << 9)
-# define GFX_CLK_FORCE_OFF (1 << 10)
-# define GFX_CLK_OFF_ACPI_D1 (1 << 11)
-# define GFX_CLK_OFF_ACPI_D2 (1 << 12)
-# define GFX_CLK_OFF_ACPI_D3 (1 << 13)
-# define DYN_LIGHT_SLEEP_EN (1 << 14)
-
-#define TARGET_AND_CURRENT_PROFILE_INDEX 0x1e6
-# define CURRENT_STATE_INDEX_MASK (0xf << 4)
-# define CURRENT_STATE_INDEX_SHIFT 4
-
-#define CG_FTV 0x1ef
-
-#define CG_FFCT_0 0x1f0
-# define UTC_0(x) ((x) << 0)
-# define UTC_0_MASK (0x3ff << 0)
-# define DTC_0(x) ((x) << 10)
-# define DTC_0_MASK (0x3ff << 10)
-
-#define CG_BSP 0x1ff
-# define BSP(x) ((x) << 0)
-# define BSP_MASK (0xffff << 0)
-# define BSU(x) ((x) << 16)
-# define BSU_MASK (0xf << 16)
-#define CG_AT 0x200
-# define CG_R(x) ((x) << 0)
-# define CG_R_MASK (0xffff << 0)
-# define CG_L(x) ((x) << 16)
-# define CG_L_MASK (0xffff << 16)
-
-#define CG_GIT 0x201
-# define CG_GICST(x) ((x) << 0)
-# define CG_GICST_MASK (0xffff << 0)
-# define CG_GIPOT(x) ((x) << 16)
-# define CG_GIPOT_MASK (0xffff << 16)
-
-#define CG_SSP 0x203
-# define SST(x) ((x) << 0)
-# define SST_MASK (0xffff << 0)
-# define SSTU(x) ((x) << 16)
-# define SSTU_MASK (0xf << 16)
-
-#define CG_DISPLAY_GAP_CNTL 0x20a
-# define DISP1_GAP(x) ((x) << 0)
-# define DISP1_GAP_MASK (3 << 0)
-# define DISP2_GAP(x) ((x) << 2)
-# define DISP2_GAP_MASK (3 << 2)
-# define VBI_TIMER_COUNT(x) ((x) << 4)
-# define VBI_TIMER_COUNT_MASK (0x3fff << 4)
-# define VBI_TIMER_UNIT(x) ((x) << 20)
-# define VBI_TIMER_UNIT_MASK (7 << 20)
-# define DISP1_GAP_MCHG(x) ((x) << 24)
-# define DISP1_GAP_MCHG_MASK (3 << 24)
-# define DISP2_GAP_MCHG(x) ((x) << 26)
-# define DISP2_GAP_MCHG_MASK (3 << 26)
-
-#define CG_ULV_CONTROL 0x21e
-#define CG_ULV_PARAMETER 0x21f
-
-#define SMC_SCRATCH0 0x221
-
-#define CG_CAC_CTRL 0x22e
-# define CAC_WINDOW(x) ((x) << 0)
-# define CAC_WINDOW_MASK 0x00ffffff
-
-#define DMIF_ADDR_CONFIG 0x2F5
-
-#define DMIF_ADDR_CALC 0x300
-
-#define PIPE0_DMIF_BUFFER_CONTROL 0x0328
-# define DMIF_BUFFERS_ALLOCATED(x) ((x) << 0)
-# define DMIF_BUFFERS_ALLOCATED_COMPLETED (1 << 4)
-
-#define SRBM_STATUS 0x394
-#define GRBM_RQ_PENDING (1 << 5)
-#define VMC_BUSY (1 << 8)
-#define MCB_BUSY (1 << 9)
-#define MCB_NON_DISPLAY_BUSY (1 << 10)
-#define MCC_BUSY (1 << 11)
-#define MCD_BUSY (1 << 12)
-#define SEM_BUSY (1 << 14)
-#define IH_BUSY (1 << 17)
-
-#define SRBM_SOFT_RESET 0x398
-#define SOFT_RESET_BIF (1 << 1)
-#define SOFT_RESET_DC (1 << 5)
-#define SOFT_RESET_DMA1 (1 << 6)
-#define SOFT_RESET_GRBM (1 << 8)
-#define SOFT_RESET_HDP (1 << 9)
-#define SOFT_RESET_IH (1 << 10)
-#define SOFT_RESET_MC (1 << 11)
-#define SOFT_RESET_ROM (1 << 14)
-#define SOFT_RESET_SEM (1 << 15)
-#define SOFT_RESET_VMC (1 << 17)
-#define SOFT_RESET_DMA (1 << 20)
-#define SOFT_RESET_TST (1 << 21)
-#define SOFT_RESET_REGBB (1 << 22)
-#define SOFT_RESET_ORB (1 << 23)
-
-#define CC_SYS_RB_BACKEND_DISABLE 0x3A0
-#define GC_USER_SYS_RB_BACKEND_DISABLE 0x3A1
-
-#define SRBM_READ_ERROR 0x3A6
-#define SRBM_INT_CNTL 0x3A8
-#define SRBM_INT_ACK 0x3AA
-
-#define SRBM_STATUS2 0x3B1
-#define DMA_BUSY (1 << 5)
-#define DMA1_BUSY (1 << 6)
-
-#define VM_L2_CNTL 0x500
-#define ENABLE_L2_CACHE (1 << 0)
-#define ENABLE_L2_FRAGMENT_PROCESSING (1 << 1)
-#define L2_CACHE_PTE_ENDIAN_SWAP_MODE(x) ((x) << 2)
-#define L2_CACHE_PDE_ENDIAN_SWAP_MODE(x) ((x) << 4)
-#define ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE (1 << 9)
-#define ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE (1 << 10)
-#define EFFECTIVE_L2_QUEUE_SIZE(x) (((x) & 7) << 15)
-#define CONTEXT1_IDENTITY_ACCESS_MODE(x) (((x) & 3) << 19)
-#define VM_L2_CNTL2 0x501
-#define INVALIDATE_ALL_L1_TLBS (1 << 0)
-#define INVALIDATE_L2_CACHE (1 << 1)
-#define INVALIDATE_CACHE_MODE(x) ((x) << 26)
-#define INVALIDATE_PTE_AND_PDE_CACHES 0
-#define INVALIDATE_ONLY_PTE_CACHES 1
-#define INVALIDATE_ONLY_PDE_CACHES 2
-#define VM_L2_CNTL3 0x502
-#define BANK_SELECT(x) ((x) << 0)
-#define L2_CACHE_UPDATE_MODE(x) ((x) << 6)
-#define L2_CACHE_BIGK_FRAGMENT_SIZE(x) ((x) << 15)
-#define L2_CACHE_BIGK_ASSOCIATIVITY (1 << 20)
-#define VM_L2_STATUS 0x503
-#define L2_BUSY (1 << 0)
-#define VM_CONTEXT0_CNTL 0x504
-#define ENABLE_CONTEXT (1 << 0)
-#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
-#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3)
-#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
-#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6)
-#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7)
-#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9)
-#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10)
-#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12)
-#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13)
-#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15)
-#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16)
-#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18)
-#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19)
-#define PAGE_TABLE_BLOCK_SIZE(x) (((x) & 0xF) << 24)
-#define VM_CONTEXT1_CNTL 0x505
-#define VM_CONTEXT0_CNTL2 0x50C
-#define VM_CONTEXT1_CNTL2 0x50D
-#define VM_CONTEXT8_PAGE_TABLE_BASE_ADDR 0x50E
-#define VM_CONTEXT9_PAGE_TABLE_BASE_ADDR 0x50F
-#define VM_CONTEXT10_PAGE_TABLE_BASE_ADDR 0x510
-#define VM_CONTEXT11_PAGE_TABLE_BASE_ADDR 0x511
-#define VM_CONTEXT12_PAGE_TABLE_BASE_ADDR 0x512
-#define VM_CONTEXT13_PAGE_TABLE_BASE_ADDR 0x513
-#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x514
-#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x515
-
-#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x53f
-#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x537
-#define PROTECTIONS_MASK (0xf << 0)
-#define PROTECTIONS_SHIFT 0
- /* bit 0: range
- * bit 1: pde0
- * bit 2: valid
- * bit 3: read
- * bit 4: write
- */
-#define MEMORY_CLIENT_ID_MASK (0xff << 12)
-#define MEMORY_CLIENT_ID_SHIFT 12
-#define MEMORY_CLIENT_RW_MASK (1 << 24)
-#define MEMORY_CLIENT_RW_SHIFT 24
-#define FAULT_VMID_MASK (0xf << 25)
-#define FAULT_VMID_SHIFT 25
-
#define VM_INVALIDATE_REQUEST 0x51E
#define VM_INVALIDATE_RESPONSE 0x51F
-#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR 0x546
-#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR 0x547
-
-#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x54F
-#define VM_CONTEXT1_PAGE_TABLE_BASE_ADDR 0x550
-#define VM_CONTEXT2_PAGE_TABLE_BASE_ADDR 0x551
-#define VM_CONTEXT3_PAGE_TABLE_BASE_ADDR 0x552
-#define VM_CONTEXT4_PAGE_TABLE_BASE_ADDR 0x553
-#define VM_CONTEXT5_PAGE_TABLE_BASE_ADDR 0x554
-#define VM_CONTEXT6_PAGE_TABLE_BASE_ADDR 0x555
-#define VM_CONTEXT7_PAGE_TABLE_BASE_ADDR 0x556
-#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x557
-#define VM_CONTEXT1_PAGE_TABLE_START_ADDR 0x558
-
-#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x55F
-#define VM_CONTEXT1_PAGE_TABLE_END_ADDR 0x560
-
#define VM_L2_CG 0x570
#define MC_CG_ENABLE (1 << 18)
#define MC_LS_ENABLE (1 << 19)
-#define MC_SHARED_CHMAP 0x801
-#define NOOFCHAN_SHIFT 12
-#define NOOFCHAN_MASK 0x0000f000
-#define MC_SHARED_CHREMAP 0x802
-
#define MC_VM_FB_LOCATION 0x809
#define MC_VM_AGP_TOP 0x80A
#define MC_VM_AGP_BOT 0x80B
@@ -495,21 +112,6 @@
#define MC_CITF_MISC_WR_CG 0x993
#define MC_CITF_MISC_VM_CG 0x994
-#define MC_ARB_RAMCFG 0x9D8
-#define NOOFBANK_SHIFT 0
-#define NOOFBANK_MASK 0x00000003
-#define NOOFRANK_SHIFT 2
-#define NOOFRANK_MASK 0x00000004
-#define NOOFROWS_SHIFT 3
-#define NOOFROWS_MASK 0x00000038
-#define NOOFCOLS_SHIFT 6
-#define NOOFCOLS_MASK 0x000000C0
-#define CHANSIZE_SHIFT 8
-#define CHANSIZE_MASK 0x00000100
-#define CHANSIZE_OVERRIDE (1 << 11)
-#define NOOFGROUPS_SHIFT 12
-#define NOOFGROUPS_MASK 0x00001000
-
#define MC_ARB_DRAM_TIMING 0x9DD
#define MC_ARB_DRAM_TIMING2 0x9DE
@@ -635,20 +237,6 @@
#define CLKS(x) ((x) << 0)
#define CLKS_MASK (0xfff << 0)
-#define HDP_HOST_PATH_CNTL 0xB00
-#define CLOCK_GATING_DIS (1 << 23)
-#define HDP_NONSURFACE_BASE 0xB01
-#define HDP_NONSURFACE_INFO 0xB02
-#define HDP_NONSURFACE_SIZE 0xB03
-
-#define HDP_DEBUG0 0xBCC
-
-#define HDP_ADDR_CONFIG 0xBD2
-#define HDP_MISC_CNTL 0xBD3
-#define HDP_FLUSH_INVALIDATE_CACHE (1 << 0)
-#define HDP_MEM_POWER_LS 0xBD4
-#define HDP_LS_ENABLE (1 << 0)
-
#define ATC_MISC_CG 0xCD4
#define IH_RB_CNTL 0xF80
@@ -678,8 +266,6 @@
# define MC_WR_CLEAN_CNT(x) ((x) << 20)
# define MC_VMID(x) ((x) << 25)
-#define CONFIG_MEMSIZE 0x150A
-
#define INTERRUPT_CNTL 0x151A
# define IH_DUMMY_RD_OVERRIDE (1 << 0)
# define IH_DUMMY_RD_EN (1 << 1)
@@ -687,734 +273,28 @@
# define GEN_IH_INT_EN (1 << 8)
#define INTERRUPT_CNTL2 0x151B
-#define HDP_MEM_COHERENCY_FLUSH_CNTL 0x1520
-
-#define BIF_FB_EN 0x1524
-#define FB_READ_EN (1 << 0)
-#define FB_WRITE_EN (1 << 1)
-
-#define HDP_REG_COHERENCY_FLUSH_CNTL 0x1528
-
-/* DCE6 ELD audio interface */
-#define AZ_F0_CODEC_ENDPOINT_INDEX 0x1780
-# define AZ_ENDPOINT_REG_INDEX(x) (((x) & 0xff) << 0)
-# define AZ_ENDPOINT_REG_WRITE_EN (1 << 8)
-#define AZ_F0_CODEC_ENDPOINT_DATA 0x1781
-
-#define AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER 0x25
-#define SPEAKER_ALLOCATION(x) (((x) & 0x7f) << 0)
-#define SPEAKER_ALLOCATION_MASK (0x7f << 0)
-#define SPEAKER_ALLOCATION_SHIFT 0
-#define HDMI_CONNECTION (1 << 16)
-#define DP_CONNECTION (1 << 17)
-
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3 0x2B /* MP3 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4 0x2C /* MPEG2 */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5 0x2D /* AAC */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6 0x2E /* DTS */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7 0x2F /* ATRAC */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8 0x30 /* one bit audio - leave at 0 (default) */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9 0x31 /* Dolby Digital */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10 0x32 /* DTS-HD */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11 0x33 /* MAT-MLP */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12 0x34 /* DTS */
-#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13 0x35 /* WMA Pro */
-# define MAX_CHANNELS(x) (((x) & 0x7) << 0)
-/* max channels minus one. 7 = 8 channels */
-# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8)
-# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16)
-# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */
-/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
- * bit0 = 32 kHz
- * bit1 = 44.1 kHz
- * bit2 = 48 kHz
- * bit3 = 88.2 kHz
- * bit4 = 96 kHz
- * bit5 = 176.4 kHz
- * bit6 = 192 kHz
- */
-
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC 0x37
-# define VIDEO_LIPSYNC(x) (((x) & 0xff) << 0)
-# define AUDIO_LIPSYNC(x) (((x) & 0xff) << 8)
-/* VIDEO_LIPSYNC, AUDIO_LIPSYNC
- * 0 = invalid
- * x = legal delay value
- * 255 = sync not supported
- */
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_HBR 0x38
-# define HBR_CAPABLE (1 << 0) /* enabled by default */
-
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO0 0x3a
-# define MANUFACTURER_ID(x) (((x) & 0xffff) << 0)
-# define PRODUCT_ID(x) (((x) & 0xffff) << 16)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO1 0x3b
-# define SINK_DESCRIPTION_LEN(x) (((x) & 0xff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO2 0x3c
-# define PORT_ID0(x) (((x) & 0xffffffff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO3 0x3d
-# define PORT_ID1(x) (((x) & 0xffffffff) << 0)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO4 0x3e
-# define DESCRIPTION0(x) (((x) & 0xff) << 0)
-# define DESCRIPTION1(x) (((x) & 0xff) << 8)
-# define DESCRIPTION2(x) (((x) & 0xff) << 16)
-# define DESCRIPTION3(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO5 0x3f
-# define DESCRIPTION4(x) (((x) & 0xff) << 0)
-# define DESCRIPTION5(x) (((x) & 0xff) << 8)
-# define DESCRIPTION6(x) (((x) & 0xff) << 16)
-# define DESCRIPTION7(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO6 0x40
-# define DESCRIPTION8(x) (((x) & 0xff) << 0)
-# define DESCRIPTION9(x) (((x) & 0xff) << 8)
-# define DESCRIPTION10(x) (((x) & 0xff) << 16)
-# define DESCRIPTION11(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO7 0x41
-# define DESCRIPTION12(x) (((x) & 0xff) << 0)
-# define DESCRIPTION13(x) (((x) & 0xff) << 8)
-# define DESCRIPTION14(x) (((x) & 0xff) << 16)
-# define DESCRIPTION15(x) (((x) & 0xff) << 24)
-#define AZ_F0_CODEC_PIN_CONTROL_SINK_INFO8 0x42
-# define DESCRIPTION16(x) (((x) & 0xff) << 0)
-# define DESCRIPTION17(x) (((x) & 0xff) << 8)
-
-#define AZ_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL 0x54
-# define AUDIO_ENABLED (1 << 31)
-
-#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT 0x56
-#define PORT_CONNECTIVITY_MASK (3 << 30)
-#define PORT_CONNECTIVITY_SHIFT 30
-
-#define DC_LB_MEMORY_SPLIT 0x1AC3
-#define DC_LB_MEMORY_CONFIG(x) ((x) << 20)
-
-#define PRIORITY_A_CNT 0x1AC6
-#define PRIORITY_MARK_MASK 0x7fff
-#define PRIORITY_OFF (1 << 16)
-#define PRIORITY_ALWAYS_ON (1 << 20)
-#define PRIORITY_B_CNT 0x1AC7
-
-#define DPG_PIPE_ARBITRATION_CONTROL3 0x1B32
-# define LATENCY_WATERMARK_MASK(x) ((x) << 16)
-#define DPG_PIPE_LATENCY_CONTROL 0x1B33
-# define LATENCY_LOW_WATERMARK(x) ((x) << 0)
-# define LATENCY_HIGH_WATERMARK(x) ((x) << 16)
-
-/* 0x6bb8, 0x77b8, 0x103b8, 0x10fb8, 0x11bb8, 0x127b8 */
-#define VLINE_STATUS 0x1AEE
-# define VLINE_OCCURRED (1 << 0)
-# define VLINE_ACK (1 << 4)
-# define VLINE_STAT (1 << 12)
-# define VLINE_INTERRUPT (1 << 16)
-# define VLINE_INTERRUPT_TYPE (1 << 17)
-/* 0x6bbc, 0x77bc, 0x103bc, 0x10fbc, 0x11bbc, 0x127bc */
-#define VBLANK_STATUS 0x1AEF
-# define VBLANK_OCCURRED (1 << 0)
-# define VBLANK_ACK (1 << 4)
-# define VBLANK_STAT (1 << 12)
-# define VBLANK_INTERRUPT (1 << 16)
-# define VBLANK_INTERRUPT_TYPE (1 << 17)
-
-/* 0x6b40, 0x7740, 0x10340, 0x10f40, 0x11b40, 0x12740 */
-#define INT_MASK 0x1AD0
-# define VBLANK_INT_MASK (1 << 0)
-# define VLINE_INT_MASK (1 << 4)
-
-#define DISP_INTERRUPT_STATUS 0x183D
-# define LB_D1_VLINE_INTERRUPT (1 << 2)
-# define LB_D1_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD1_INTERRUPT (1 << 17)
-# define DC_HPD1_RX_INTERRUPT (1 << 18)
-# define DACA_AUTODETECT_INTERRUPT (1 << 22)
-# define DACB_AUTODETECT_INTERRUPT (1 << 23)
-# define DC_I2C_SW_DONE_INTERRUPT (1 << 24)
-# define DC_I2C_HW_DONE_INTERRUPT (1 << 25)
-#define DISP_INTERRUPT_STATUS_CONTINUE 0x183E
-# define LB_D2_VLINE_INTERRUPT (1 << 2)
-# define LB_D2_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD2_INTERRUPT (1 << 17)
-# define DC_HPD2_RX_INTERRUPT (1 << 18)
-# define DISP_TIMER_INTERRUPT (1 << 24)
-#define DISP_INTERRUPT_STATUS_CONTINUE2 0x183F
-# define LB_D3_VLINE_INTERRUPT (1 << 2)
-# define LB_D3_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD3_INTERRUPT (1 << 17)
-# define DC_HPD3_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE3 0x1840
-# define LB_D4_VLINE_INTERRUPT (1 << 2)
-# define LB_D4_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD4_INTERRUPT (1 << 17)
-# define DC_HPD4_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE4 0x1853
-# define LB_D5_VLINE_INTERRUPT (1 << 2)
-# define LB_D5_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD5_INTERRUPT (1 << 17)
-# define DC_HPD5_RX_INTERRUPT (1 << 18)
-#define DISP_INTERRUPT_STATUS_CONTINUE5 0x1854
-# define LB_D6_VLINE_INTERRUPT (1 << 2)
-# define LB_D6_VBLANK_INTERRUPT (1 << 3)
-# define DC_HPD6_INTERRUPT (1 << 17)
-# define DC_HPD6_RX_INTERRUPT (1 << 18)
-
-/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */
-#define GRPH_INT_STATUS 0x1A16
-# define GRPH_PFLIP_INT_OCCURRED (1 << 0)
-# define GRPH_PFLIP_INT_CLEAR (1 << 8)
-/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */
-#define GRPH_INT_CONTROL 0x1A17
-# define GRPH_PFLIP_INT_MASK (1 << 0)
-# define GRPH_PFLIP_INT_TYPE (1 << 8)
-
-#define DAC_AUTODETECT_INT_CONTROL 0x19F2
-
-#define DC_HPD1_INT_STATUS 0x1807
-#define DC_HPD2_INT_STATUS 0x180A
-#define DC_HPD3_INT_STATUS 0x180D
-#define DC_HPD4_INT_STATUS 0x1810
-#define DC_HPD5_INT_STATUS 0x1813
-#define DC_HPD6_INT_STATUS 0x1816
-# define DC_HPDx_INT_STATUS (1 << 0)
-# define DC_HPDx_SENSE (1 << 1)
-# define DC_HPDx_RX_INT_STATUS (1 << 8)
-
-#define DC_HPD1_INT_CONTROL 0x1808
-#define DC_HPD2_INT_CONTROL 0x180B
-#define DC_HPD3_INT_CONTROL 0x180E
-#define DC_HPD4_INT_CONTROL 0x1811
-#define DC_HPD5_INT_CONTROL 0x1814
-#define DC_HPD6_INT_CONTROL 0x1817
-# define DC_HPDx_INT_ACK (1 << 0)
-# define DC_HPDx_INT_POLARITY (1 << 8)
-# define DC_HPDx_INT_EN (1 << 16)
-# define DC_HPDx_RX_INT_ACK (1 << 20)
-# define DC_HPDx_RX_INT_EN (1 << 24)
-
-#define DC_HPD1_CONTROL 0x1809
-#define DC_HPD2_CONTROL 0x180C
-#define DC_HPD3_CONTROL 0x180F
-#define DC_HPD4_CONTROL 0x1812
-#define DC_HPD5_CONTROL 0x1815
-#define DC_HPD6_CONTROL 0x1818
-# define DC_HPDx_CONNECTION_TIMER(x) ((x) << 0)
-# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16)
-# define DC_HPDx_EN (1 << 28)
-
-#define DPG_PIPE_STUTTER_CONTROL 0x1B35
-# define STUTTER_ENABLE (1 << 0)
-
-/* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */
-#define CRTC_STATUS_FRAME_COUNT 0x1BA6
-
-/* Audio clocks */
-#define DCCG_AUDIO_DTO_SOURCE 0x05ac
-# define DCCG_AUDIO_DTO0_SOURCE_SEL(x) ((x) << 0) /* crtc0 - crtc5 */
-# define DCCG_AUDIO_DTO_SEL (1 << 4) /* 0=dto0 1=dto1 */
-
-#define DCCG_AUDIO_DTO0_PHASE 0x05b0
-#define DCCG_AUDIO_DTO0_MODULE 0x05b4
-#define DCCG_AUDIO_DTO1_PHASE 0x05c0
-#define DCCG_AUDIO_DTO1_MODULE 0x05c4
-
-#define AFMT_AUDIO_SRC_CONTROL 0x1c4f
-#define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0)
-/* AFMT_AUDIO_SRC_SELECT
- * 0 = stream0
- * 1 = stream1
- * 2 = stream2
- * 3 = stream3
- * 4 = stream4
- * 5 = stream5
- */
-
-#define GRBM_CNTL 0x2000
-#define GRBM_READ_TIMEOUT(x) ((x) << 0)
-
-#define GRBM_STATUS2 0x2002
-#define RLC_RQ_PENDING (1 << 0)
-#define RLC_BUSY (1 << 8)
-#define TC_BUSY (1 << 9)
-
-#define GRBM_STATUS 0x2004
-#define CMDFIFO_AVAIL_MASK 0x0000000F
-#define RING2_RQ_PENDING (1 << 4)
-#define SRBM_RQ_PENDING (1 << 5)
-#define RING1_RQ_PENDING (1 << 6)
-#define CF_RQ_PENDING (1 << 7)
-#define PF_RQ_PENDING (1 << 8)
-#define GDS_DMA_RQ_PENDING (1 << 9)
-#define GRBM_EE_BUSY (1 << 10)
-#define DB_CLEAN (1 << 12)
-#define CB_CLEAN (1 << 13)
-#define TA_BUSY (1 << 14)
-#define GDS_BUSY (1 << 15)
-#define VGT_BUSY (1 << 17)
-#define IA_BUSY_NO_DMA (1 << 18)
-#define IA_BUSY (1 << 19)
-#define SX_BUSY (1 << 20)
-#define SPI_BUSY (1 << 22)
-#define BCI_BUSY (1 << 23)
-#define SC_BUSY (1 << 24)
-#define PA_BUSY (1 << 25)
-#define DB_BUSY (1 << 26)
-#define CP_COHERENCY_BUSY (1 << 28)
-#define CP_BUSY (1 << 29)
-#define CB_BUSY (1 << 30)
-#define GUI_ACTIVE (1 << 31)
-#define GRBM_STATUS_SE0 0x2005
-#define GRBM_STATUS_SE1 0x2006
-#define SE_DB_CLEAN (1 << 1)
-#define SE_CB_CLEAN (1 << 2)
-#define SE_BCI_BUSY (1 << 22)
-#define SE_VGT_BUSY (1 << 23)
-#define SE_PA_BUSY (1 << 24)
-#define SE_TA_BUSY (1 << 25)
-#define SE_SX_BUSY (1 << 26)
-#define SE_SPI_BUSY (1 << 27)
-#define SE_SC_BUSY (1 << 29)
-#define SE_DB_BUSY (1 << 30)
-#define SE_CB_BUSY (1 << 31)
-
-#define GRBM_SOFT_RESET 0x2008
-#define SOFT_RESET_CP (1 << 0)
-#define SOFT_RESET_CB (1 << 1)
-#define SOFT_RESET_RLC (1 << 2)
-#define SOFT_RESET_DB (1 << 3)
-#define SOFT_RESET_GDS (1 << 4)
-#define SOFT_RESET_PA (1 << 5)
-#define SOFT_RESET_SC (1 << 6)
-#define SOFT_RESET_BCI (1 << 7)
-#define SOFT_RESET_SPI (1 << 8)
-#define SOFT_RESET_SX (1 << 10)
-#define SOFT_RESET_TC (1 << 11)
-#define SOFT_RESET_TA (1 << 12)
-#define SOFT_RESET_VGT (1 << 14)
-#define SOFT_RESET_IA (1 << 15)
-
-#define GRBM_GFX_INDEX 0x200B
-#define INSTANCE_INDEX(x) ((x) << 0)
-#define SH_INDEX(x) ((x) << 8)
-#define SE_INDEX(x) ((x) << 16)
-#define SH_BROADCAST_WRITES (1 << 29)
-#define INSTANCE_BROADCAST_WRITES (1 << 30)
-#define SE_BROADCAST_WRITES (1 << 31)
-
-#define GRBM_INT_CNTL 0x2018
-# define RDERR_INT_ENABLE (1 << 0)
-# define GUI_IDLE_INT_ENABLE (1 << 19)
-
-#define CP_STRMOUT_CNTL 0x213F
-#define SCRATCH_REG0 0x2140
-#define SCRATCH_REG1 0x2141
-#define SCRATCH_REG2 0x2142
-#define SCRATCH_REG3 0x2143
-#define SCRATCH_REG4 0x2144
-#define SCRATCH_REG5 0x2145
-#define SCRATCH_REG6 0x2146
-#define SCRATCH_REG7 0x2147
-
-#define SCRATCH_UMSK 0x2150
-#define SCRATCH_ADDR 0x2151
-
-#define CP_SEM_WAIT_TIMER 0x216F
-
-#define CP_SEM_INCOMPLETE_TIMER_CNTL 0x2172
-
-#define CP_ME_CNTL 0x21B6
-#define CP_CE_HALT (1 << 24)
-#define CP_PFP_HALT (1 << 26)
-#define CP_ME_HALT (1 << 28)
-
-#define CP_COHER_CNTL2 0x217A
-
-#define CP_RB2_RPTR 0x21BE
-#define CP_RB1_RPTR 0x21BF
-#define CP_RB0_RPTR 0x21C0
-#define CP_RB_WPTR_DELAY 0x21C1
-
-#define CP_QUEUE_THRESHOLDS 0x21D8
-#define ROQ_IB1_START(x) ((x) << 0)
-#define ROQ_IB2_START(x) ((x) << 8)
-#define CP_MEQ_THRESHOLDS 0x21D9
-#define MEQ1_START(x) ((x) << 0)
-#define MEQ2_START(x) ((x) << 8)
-
-#define CP_PERFMON_CNTL 0x21FF
-
#define VGT_VTX_VECT_EJECT_REG 0x222C
-
-#define VGT_CACHE_INVALIDATION 0x2231
-#define CACHE_INVALIDATION(x) ((x) << 0)
-#define VC_ONLY 0
-#define TC_ONLY 1
-#define VC_AND_TC 2
-#define AUTO_INVLD_EN(x) ((x) << 6)
-#define NO_AUTO 0
-#define ES_AUTO 1
-#define GS_AUTO 2
-#define ES_AND_GS_AUTO 3
#define VGT_ESGS_RING_SIZE 0x2232
#define VGT_GSVS_RING_SIZE 0x2233
-
#define VGT_GS_VERTEX_REUSE 0x2235
-
#define VGT_PRIMITIVE_TYPE 0x2256
#define VGT_INDEX_TYPE 0x2257
-
#define VGT_NUM_INDICES 0x225C
#define VGT_NUM_INSTANCES 0x225D
-
#define VGT_TF_RING_SIZE 0x2262
-
#define VGT_HS_OFFCHIP_PARAM 0x226C
-
#define VGT_TF_MEMORY_BASE 0x226E
-#define CC_GC_SHADER_ARRAY_CONFIG 0x226F
-#define INACTIVE_CUS_MASK 0xFFFF0000
-#define INACTIVE_CUS_SHIFT 16
-#define GC_USER_SHADER_ARRAY_CONFIG 0x2270
-
-#define PA_CL_ENHANCE 0x2285
-#define CLIP_VTX_REORDER_ENA (1 << 0)
-#define NUM_CLIP_SEQ(x) ((x) << 1)
-
-#define PA_SU_LINE_STIPPLE_VALUE 0x2298
-
-#define PA_SC_LINE_STIPPLE_STATE 0x22C4
-
-#define PA_SC_FORCE_EOV_MAX_CNTS 0x22C9
-#define FORCE_EOV_MAX_CLK_CNT(x) ((x) << 0)
-#define FORCE_EOV_MAX_REZ_CNT(x) ((x) << 16)
-
-#define PA_SC_FIFO_SIZE 0x22F3
-#define SC_FRONTEND_PRIM_FIFO_SIZE(x) ((x) << 0)
-#define SC_BACKEND_PRIM_FIFO_SIZE(x) ((x) << 6)
-#define SC_HIZ_TILE_FIFO_SIZE(x) ((x) << 15)
-#define SC_EARLYZ_TILE_FIFO_SIZE(x) ((x) << 23)
-
#define PA_SC_ENHANCE 0x22FC
-#define SQ_CONFIG 0x2300
-
-#define SQC_CACHES 0x2302
-
-#define SQ_POWER_THROTTLE 0x2396
-#define MIN_POWER(x) ((x) << 0)
-#define MIN_POWER_MASK (0x3fff << 0)
-#define MIN_POWER_SHIFT 0
-#define MAX_POWER(x) ((x) << 16)
-#define MAX_POWER_MASK (0x3fff << 16)
-#define MAX_POWER_SHIFT 0
-#define SQ_POWER_THROTTLE2 0x2397
-#define MAX_POWER_DELTA(x) ((x) << 0)
-#define MAX_POWER_DELTA_MASK (0x3fff << 0)
-#define MAX_POWER_DELTA_SHIFT 0
-#define STI_SIZE(x) ((x) << 16)
-#define STI_SIZE_MASK (0x3ff << 16)
-#define STI_SIZE_SHIFT 16
-#define LTI_RATIO(x) ((x) << 27)
-#define LTI_RATIO_MASK (0xf << 27)
-#define LTI_RATIO_SHIFT 27
-
-#define SX_DEBUG_1 0x2418
-
-#define SPI_STATIC_THREAD_MGMT_1 0x2438
-#define SPI_STATIC_THREAD_MGMT_2 0x2439
-#define SPI_STATIC_THREAD_MGMT_3 0x243A
-#define SPI_PS_MAX_WAVE_ID 0x243B
-
-#define SPI_CONFIG_CNTL 0x2440
-
-#define SPI_CONFIG_CNTL_1 0x244F
-#define VTX_DONE_DELAY(x) ((x) << 0)
-#define INTERP_ONE_PRIM_PER_ROW (1 << 4)
-
-#define CGTS_TCC_DISABLE 0x2452
-#define CGTS_USER_TCC_DISABLE 0x2453
-#define TCC_DISABLE_MASK 0xFFFF0000
-#define TCC_DISABLE_SHIFT 16
-#define CGTS_SM_CTRL_REG 0x2454
-#define OVERRIDE (1 << 21)
-#define LS_OVERRIDE (1 << 22)
-
-#define SPI_LB_CU_MASK 0x24D5
-
#define TA_CNTL_AUX 0x2542
-#define CC_RB_BACKEND_DISABLE 0x263D
-#define BACKEND_DISABLE(x) ((x) << 16)
-#define GB_ADDR_CONFIG 0x263E
-#define NUM_PIPES(x) ((x) << 0)
-#define NUM_PIPES_MASK 0x00000007
-#define NUM_PIPES_SHIFT 0
-#define PIPE_INTERLEAVE_SIZE(x) ((x) << 4)
-#define PIPE_INTERLEAVE_SIZE_MASK 0x00000070
-#define PIPE_INTERLEAVE_SIZE_SHIFT 4
-#define NUM_SHADER_ENGINES(x) ((x) << 12)
-#define NUM_SHADER_ENGINES_MASK 0x00003000
-#define NUM_SHADER_ENGINES_SHIFT 12
-#define SHADER_ENGINE_TILE_SIZE(x) ((x) << 16)
-#define SHADER_ENGINE_TILE_SIZE_MASK 0x00070000
-#define SHADER_ENGINE_TILE_SIZE_SHIFT 16
-#define NUM_GPUS(x) ((x) << 20)
-#define NUM_GPUS_MASK 0x00700000
-#define NUM_GPUS_SHIFT 20
-#define MULTI_GPU_TILE_SIZE(x) ((x) << 24)
-#define MULTI_GPU_TILE_SIZE_MASK 0x03000000
-#define MULTI_GPU_TILE_SIZE_SHIFT 24
-#define ROW_SIZE(x) ((x) << 28)
-#define ROW_SIZE_MASK 0x30000000
-#define ROW_SIZE_SHIFT 28
-
-#define GB_TILE_MODE0 0x2644
-# define MICRO_TILE_MODE(x) ((x) << 0)
-# define ADDR_SURF_DISPLAY_MICRO_TILING 0
-# define ADDR_SURF_THIN_MICRO_TILING 1
-# define ADDR_SURF_DEPTH_MICRO_TILING 2
-# define ARRAY_MODE(x) ((x) << 2)
-# define ARRAY_LINEAR_GENERAL 0
-# define ARRAY_LINEAR_ALIGNED 1
-# define ARRAY_1D_TILED_THIN1 2
-# define ARRAY_2D_TILED_THIN1 4
-# define PIPE_CONFIG(x) ((x) << 6)
-# define ADDR_SURF_P2 0
-# define ADDR_SURF_P4_8x16 4
-# define ADDR_SURF_P4_16x16 5
-# define ADDR_SURF_P4_16x32 6
-# define ADDR_SURF_P4_32x32 7
-# define ADDR_SURF_P8_16x16_8x16 8
-# define ADDR_SURF_P8_16x32_8x16 9
-# define ADDR_SURF_P8_32x32_8x16 10
-# define ADDR_SURF_P8_16x32_16x16 11
-# define ADDR_SURF_P8_32x32_16x16 12
-# define ADDR_SURF_P8_32x32_16x32 13
-# define ADDR_SURF_P8_32x64_32x32 14
-# define TILE_SPLIT(x) ((x) << 11)
-# define ADDR_SURF_TILE_SPLIT_64B 0
-# define ADDR_SURF_TILE_SPLIT_128B 1
-# define ADDR_SURF_TILE_SPLIT_256B 2
-# define ADDR_SURF_TILE_SPLIT_512B 3
-# define ADDR_SURF_TILE_SPLIT_1KB 4
-# define ADDR_SURF_TILE_SPLIT_2KB 5
-# define ADDR_SURF_TILE_SPLIT_4KB 6
-# define BANK_WIDTH(x) ((x) << 14)
-# define ADDR_SURF_BANK_WIDTH_1 0
-# define ADDR_SURF_BANK_WIDTH_2 1
-# define ADDR_SURF_BANK_WIDTH_4 2
-# define ADDR_SURF_BANK_WIDTH_8 3
-# define BANK_HEIGHT(x) ((x) << 16)
-# define ADDR_SURF_BANK_HEIGHT_1 0
-# define ADDR_SURF_BANK_HEIGHT_2 1
-# define ADDR_SURF_BANK_HEIGHT_4 2
-# define ADDR_SURF_BANK_HEIGHT_8 3
-# define MACRO_TILE_ASPECT(x) ((x) << 18)
-# define ADDR_SURF_MACRO_ASPECT_1 0
-# define ADDR_SURF_MACRO_ASPECT_2 1
-# define ADDR_SURF_MACRO_ASPECT_4 2
-# define ADDR_SURF_MACRO_ASPECT_8 3
-# define NUM_BANKS(x) ((x) << 20)
-# define ADDR_SURF_2_BANK 0
-# define ADDR_SURF_4_BANK 1
-# define ADDR_SURF_8_BANK 2
-# define ADDR_SURF_16_BANK 3
-#define GB_TILE_MODE1 0x2645
-#define GB_TILE_MODE2 0x2646
-#define GB_TILE_MODE3 0x2647
-#define GB_TILE_MODE4 0x2648
-#define GB_TILE_MODE5 0x2649
-#define GB_TILE_MODE6 0x264a
-#define GB_TILE_MODE7 0x264b
-#define GB_TILE_MODE8 0x264c
-#define GB_TILE_MODE9 0x264d
-#define GB_TILE_MODE10 0x264e
-#define GB_TILE_MODE11 0x264f
-#define GB_TILE_MODE12 0x2650
-#define GB_TILE_MODE13 0x2651
-#define GB_TILE_MODE14 0x2652
-#define GB_TILE_MODE15 0x2653
-#define GB_TILE_MODE16 0x2654
-#define GB_TILE_MODE17 0x2655
-#define GB_TILE_MODE18 0x2656
-#define GB_TILE_MODE19 0x2657
-#define GB_TILE_MODE20 0x2658
-#define GB_TILE_MODE21 0x2659
-#define GB_TILE_MODE22 0x265a
-#define GB_TILE_MODE23 0x265b
-#define GB_TILE_MODE24 0x265c
-#define GB_TILE_MODE25 0x265d
-#define GB_TILE_MODE26 0x265e
-#define GB_TILE_MODE27 0x265f
-#define GB_TILE_MODE28 0x2660
-#define GB_TILE_MODE29 0x2661
-#define GB_TILE_MODE30 0x2662
-#define GB_TILE_MODE31 0x2663
-
-#define CB_PERFCOUNTER0_SELECT0 0x2688
-#define CB_PERFCOUNTER0_SELECT1 0x2689
-#define CB_PERFCOUNTER1_SELECT0 0x268A
-#define CB_PERFCOUNTER1_SELECT1 0x268B
-#define CB_PERFCOUNTER2_SELECT0 0x268C
-#define CB_PERFCOUNTER2_SELECT1 0x268D
-#define CB_PERFCOUNTER3_SELECT0 0x268E
-#define CB_PERFCOUNTER3_SELECT1 0x268F
-
-#define CB_CGTT_SCLK_CTRL 0x2698
-
-#define GC_USER_RB_BACKEND_DISABLE 0x26DF
-#define BACKEND_DISABLE_MASK 0x00FF0000
-#define BACKEND_DISABLE_SHIFT 16
-
-#define TCP_CHAN_STEER_LO 0x2B03
-#define TCP_CHAN_STEER_HI 0x2B94
-
-#define CP_RB0_BASE 0x3040
-#define CP_RB0_CNTL 0x3041
-#define RB_BUFSZ(x) ((x) << 0)
-#define RB_BLKSZ(x) ((x) << 8)
-#define BUF_SWAP_32BIT (2 << 16)
-#define RB_NO_UPDATE (1 << 27)
-#define RB_RPTR_WR_ENA (1 << 31)
-
-#define CP_RB0_RPTR_ADDR 0x3043
-#define CP_RB0_RPTR_ADDR_HI 0x3044
-#define CP_RB0_WPTR 0x3045
-
-#define CP_PFP_UCODE_ADDR 0x3054
-#define CP_PFP_UCODE_DATA 0x3055
-#define CP_ME_RAM_RADDR 0x3056
-#define CP_ME_RAM_WADDR 0x3057
-#define CP_ME_RAM_DATA 0x3058
-
-#define CP_CE_UCODE_ADDR 0x305A
-#define CP_CE_UCODE_DATA 0x305B
-
-#define CP_RB1_BASE 0x3060
-#define CP_RB1_CNTL 0x3061
-#define CP_RB1_RPTR_ADDR 0x3062
-#define CP_RB1_RPTR_ADDR_HI 0x3063
-#define CP_RB1_WPTR 0x3064
-#define CP_RB2_BASE 0x3065
-#define CP_RB2_CNTL 0x3066
-#define CP_RB2_RPTR_ADDR 0x3067
-#define CP_RB2_RPTR_ADDR_HI 0x3068
-#define CP_RB2_WPTR 0x3069
-#define CP_INT_CNTL_RING0 0x306A
-#define CP_INT_CNTL_RING1 0x306B
-#define CP_INT_CNTL_RING2 0x306C
-# define CNTX_BUSY_INT_ENABLE (1 << 19)
-# define CNTX_EMPTY_INT_ENABLE (1 << 20)
-# define WAIT_MEM_SEM_INT_ENABLE (1 << 21)
-# define TIME_STAMP_INT_ENABLE (1 << 26)
-# define CP_RINGID2_INT_ENABLE (1 << 29)
-# define CP_RINGID1_INT_ENABLE (1 << 30)
-# define CP_RINGID0_INT_ENABLE (1 << 31)
-#define CP_INT_STATUS_RING0 0x306D
-#define CP_INT_STATUS_RING1 0x306E
-#define CP_INT_STATUS_RING2 0x306F
-# define WAIT_MEM_SEM_INT_STAT (1 << 21)
-# define TIME_STAMP_INT_STAT (1 << 26)
-# define CP_RINGID2_INT_STAT (1 << 29)
-# define CP_RINGID1_INT_STAT (1 << 30)
-# define CP_RINGID0_INT_STAT (1 << 31)
-
-#define CP_MEM_SLP_CNTL 0x3079
-# define CP_MEM_LS_EN (1 << 0)
-
-#define CP_DEBUG 0x307F
-
-#define RLC_CNTL 0x30C0
-# define RLC_ENABLE (1 << 0)
-#define RLC_RL_BASE 0x30C1
-#define RLC_RL_SIZE 0x30C2
-#define RLC_LB_CNTL 0x30C3
-# define LOAD_BALANCE_ENABLE (1 << 0)
-#define RLC_SAVE_AND_RESTORE_BASE 0x30C4
-#define RLC_LB_CNTR_MAX 0x30C5
-#define RLC_LB_CNTR_INIT 0x30C6
-
-#define RLC_CLEAR_STATE_RESTORE_BASE 0x30C8
-
-#define RLC_UCODE_ADDR 0x30CB
-#define RLC_UCODE_DATA 0x30CC
-
-#define RLC_GPU_CLOCK_COUNT_LSB 0x30CE
-#define RLC_GPU_CLOCK_COUNT_MSB 0x30CF
-#define RLC_CAPTURE_GPU_CLOCK_COUNT 0x30D0
-#define RLC_MC_CNTL 0x30D1
-#define RLC_UCODE_CNTL 0x30D2
-#define RLC_STAT 0x30D3
-# define RLC_BUSY_STATUS (1 << 0)
-# define GFX_POWER_STATUS (1 << 1)
-# define GFX_CLOCK_STATUS (1 << 2)
-# define GFX_LS_STATUS (1 << 3)
-
-#define RLC_PG_CNTL 0x30D7
-# define GFX_PG_ENABLE (1 << 0)
-# define GFX_PG_SRC (1 << 1)
-
-#define RLC_CGTT_MGCG_OVERRIDE 0x3100
-#define RLC_CGCG_CGLS_CTRL 0x3101
-# define CGCG_EN (1 << 0)
-# define CGLS_EN (1 << 1)
-
-#define RLC_TTOP_D 0x3105
-# define RLC_PUD(x) ((x) << 0)
-# define RLC_PUD_MASK (0xff << 0)
-# define RLC_PDD(x) ((x) << 8)
-# define RLC_PDD_MASK (0xff << 8)
-# define RLC_TTPD(x) ((x) << 16)
-# define RLC_TTPD_MASK (0xff << 16)
-# define RLC_MSD(x) ((x) << 24)
-# define RLC_MSD_MASK (0xff << 24)
-
-#define RLC_LB_INIT_CU_MASK 0x3107
-
-#define RLC_PG_AO_CU_MASK 0x310B
-#define RLC_MAX_PG_CU 0x310C
-# define MAX_PU_CU(x) ((x) << 0)
-# define MAX_PU_CU_MASK (0xff << 0)
-#define RLC_AUTO_PG_CTRL 0x310C
-# define AUTO_PG_EN (1 << 0)
-# define GRBM_REG_SGIT(x) ((x) << 3)
-# define GRBM_REG_SGIT_MASK (0xffff << 3)
-# define PG_AFTER_GRBM_REG_ST(x) ((x) << 19)
-# define PG_AFTER_GRBM_REG_ST_MASK (0x1fff << 19)
-
-#define RLC_SERDES_WR_MASTER_MASK_0 0x3115
-#define RLC_SERDES_WR_MASTER_MASK_1 0x3116
-#define RLC_SERDES_WR_CTRL 0x3117
-
-#define RLC_SERDES_MASTER_BUSY_0 0x3119
-#define RLC_SERDES_MASTER_BUSY_1 0x311A
-
-#define RLC_GCPM_GENERAL_3 0x311E
-
-#define DB_RENDER_CONTROL 0xA000
-
-#define DB_DEPTH_INFO 0xA00F
-
-#define PA_SC_RASTER_CONFIG 0xA0D4
-# define RB_MAP_PKR0(x) ((x) << 0)
-# define RB_MAP_PKR0_MASK (0x3 << 0)
-# define RB_MAP_PKR1(x) ((x) << 2)
-# define RB_MAP_PKR1_MASK (0x3 << 2)
-# define RASTER_CONFIG_RB_MAP_0 0
-# define RASTER_CONFIG_RB_MAP_1 1
-# define RASTER_CONFIG_RB_MAP_2 2
-# define RASTER_CONFIG_RB_MAP_3 3
+// #define PA_SC_RASTER_CONFIG 0xA0D4
# define RB_XSEL2(x) ((x) << 4)
# define RB_XSEL2_MASK (0x3 << 4)
# define RB_XSEL (1 << 6)
# define RB_YSEL (1 << 7)
# define PKR_MAP(x) ((x) << 8)
-# define PKR_MAP_MASK (0x3 << 8)
-# define RASTER_CONFIG_PKR_MAP_0 0
-# define RASTER_CONFIG_PKR_MAP_1 1
-# define RASTER_CONFIG_PKR_MAP_2 2
-# define RASTER_CONFIG_PKR_MAP_3 3
# define PKR_XSEL(x) ((x) << 10)
# define PKR_XSEL_MASK (0x3 << 10)
# define PKR_YSEL(x) ((x) << 12)
@@ -1426,221 +306,19 @@
# define SC_YSEL(x) ((x) << 20)
# define SC_YSEL_MASK (0x3 << 20)
# define SE_MAP(x) ((x) << 24)
-# define SE_MAP_MASK (0x3 << 24)
-# define RASTER_CONFIG_SE_MAP_0 0
-# define RASTER_CONFIG_SE_MAP_1 1
-# define RASTER_CONFIG_SE_MAP_2 2
-# define RASTER_CONFIG_SE_MAP_3 3
# define SE_XSEL(x) ((x) << 26)
# define SE_XSEL_MASK (0x3 << 26)
# define SE_YSEL(x) ((x) << 28)
# define SE_YSEL_MASK (0x3 << 28)
-
-#define VGT_EVENT_INITIATOR 0xA2A4
-# define SAMPLE_STREAMOUTSTATS1 (1 << 0)
-# define SAMPLE_STREAMOUTSTATS2 (2 << 0)
-# define SAMPLE_STREAMOUTSTATS3 (3 << 0)
-# define CACHE_FLUSH_TS (4 << 0)
-# define CACHE_FLUSH (6 << 0)
-# define CS_PARTIAL_FLUSH (7 << 0)
-# define VGT_STREAMOUT_RESET (10 << 0)
-# define END_OF_PIPE_INCR_DE (11 << 0)
-# define END_OF_PIPE_IB_END (12 << 0)
-# define RST_PIX_CNT (13 << 0)
-# define VS_PARTIAL_FLUSH (15 << 0)
-# define PS_PARTIAL_FLUSH (16 << 0)
-# define CACHE_FLUSH_AND_INV_TS_EVENT (20 << 0)
-# define ZPASS_DONE (21 << 0)
-# define CACHE_FLUSH_AND_INV_EVENT (22 << 0)
-# define PERFCOUNTER_START (23 << 0)
-# define PERFCOUNTER_STOP (24 << 0)
-# define PIPELINESTAT_START (25 << 0)
-# define PIPELINESTAT_STOP (26 << 0)
-# define PERFCOUNTER_SAMPLE (27 << 0)
-# define SAMPLE_PIPELINESTAT (30 << 0)
-# define SAMPLE_STREAMOUTSTATS (32 << 0)
-# define RESET_VTX_CNT (33 << 0)
-# define VGT_FLUSH (36 << 0)
-# define BOTTOM_OF_PIPE_TS (40 << 0)
-# define DB_CACHE_FLUSH_AND_INV (42 << 0)
-# define FLUSH_AND_INV_DB_DATA_TS (43 << 0)
-# define FLUSH_AND_INV_DB_META (44 << 0)
-# define FLUSH_AND_INV_CB_DATA_TS (45 << 0)
-# define FLUSH_AND_INV_CB_META (46 << 0)
-# define CS_DONE (47 << 0)
-# define PS_DONE (48 << 0)
-# define FLUSH_AND_INV_CB_PIXEL_DATA (49 << 0)
-# define THREAD_TRACE_START (51 << 0)
-# define THREAD_TRACE_STOP (52 << 0)
-# define THREAD_TRACE_FLUSH (54 << 0)
-# define THREAD_TRACE_FINISH (55 << 0)
-
-/* PIF PHY0 registers idx/data 0x8/0xc */
-#define PB0_PIF_CNTL 0x10
-# define LS2_EXIT_TIME(x) ((x) << 17)
-# define LS2_EXIT_TIME_MASK (0x7 << 17)
-# define LS2_EXIT_TIME_SHIFT 17
-#define PB0_PIF_PAIRING 0x11
-# define MULTI_PIF (1 << 25)
-#define PB0_PIF_PWRDOWN_0 0x12
-# define PLL_POWER_STATE_IN_TXS2_0(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_0_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_0_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_0(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_0_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_0_SHIFT 10
-# define PLL_RAMP_UP_TIME_0(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_0_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_0_SHIFT 24
-#define PB0_PIF_PWRDOWN_1 0x13
-# define PLL_POWER_STATE_IN_TXS2_1(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_1_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_1_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_1(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_1_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_1_SHIFT 10
-# define PLL_RAMP_UP_TIME_1(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_1_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_1_SHIFT 24
-
-#define PB0_PIF_PWRDOWN_2 0x17
-# define PLL_POWER_STATE_IN_TXS2_2(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_2_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_2_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_2(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_2_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_2_SHIFT 10
-# define PLL_RAMP_UP_TIME_2(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_2_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_2_SHIFT 24
-#define PB0_PIF_PWRDOWN_3 0x18
-# define PLL_POWER_STATE_IN_TXS2_3(x) ((x) << 7)
-# define PLL_POWER_STATE_IN_TXS2_3_MASK (0x7 << 7)
-# define PLL_POWER_STATE_IN_TXS2_3_SHIFT 7
-# define PLL_POWER_STATE_IN_OFF_3(x) ((x) << 10)
-# define PLL_POWER_STATE_IN_OFF_3_MASK (0x7 << 10)
-# define PLL_POWER_STATE_IN_OFF_3_SHIFT 10
-# define PLL_RAMP_UP_TIME_3(x) ((x) << 24)
-# define PLL_RAMP_UP_TIME_3_MASK (0x7 << 24)
-# define PLL_RAMP_UP_TIME_3_SHIFT 24
-/* PIF PHY1 registers idx/data 0x10/0x14 */
-#define PB1_PIF_CNTL 0x10
-#define PB1_PIF_PAIRING 0x11
-#define PB1_PIF_PWRDOWN_0 0x12
-#define PB1_PIF_PWRDOWN_1 0x13
-
-#define PB1_PIF_PWRDOWN_2 0x17
-#define PB1_PIF_PWRDOWN_3 0x18
-/* PCIE registers idx/data 0x30/0x34 */
-#define PCIE_CNTL2 0x1c /* PCIE */
-# define SLV_MEM_LS_EN (1 << 16)
-# define SLV_MEM_AGGRESSIVE_LS_EN (1 << 17)
-# define MST_MEM_LS_EN (1 << 18)
-# define REPLAY_MEM_LS_EN (1 << 19)
-#define PCIE_LC_STATUS1 0x28 /* PCIE */
-# define LC_REVERSE_RCVR (1 << 0)
-# define LC_REVERSE_XMIT (1 << 1)
-# define LC_OPERATING_LINK_WIDTH_MASK (0x7 << 2)
-# define LC_OPERATING_LINK_WIDTH_SHIFT 2
-# define LC_DETECTED_LINK_WIDTH_MASK (0x7 << 5)
-# define LC_DETECTED_LINK_WIDTH_SHIFT 5
-
-#define PCIE_P_CNTL 0x40 /* PCIE */
-# define P_IGNORE_EDB_ERR (1 << 6)
-
/* PCIE PORT registers idx/data 0x38/0x3c */
-#define PCIE_LC_CNTL 0xa0
-# define LC_L0S_INACTIVITY(x) ((x) << 8)
-# define LC_L0S_INACTIVITY_MASK (0xf << 8)
-# define LC_L0S_INACTIVITY_SHIFT 8
-# define LC_L1_INACTIVITY(x) ((x) << 12)
-# define LC_L1_INACTIVITY_MASK (0xf << 12)
-# define LC_L1_INACTIVITY_SHIFT 12
-# define LC_PMI_TO_L1_DIS (1 << 16)
-# define LC_ASPM_TO_L1_DIS (1 << 24)
-#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
-# define LC_LINK_WIDTH_SHIFT 0
-# define LC_LINK_WIDTH_MASK 0x7
+// #define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
# define LC_LINK_WIDTH_X0 0
# define LC_LINK_WIDTH_X1 1
# define LC_LINK_WIDTH_X2 2
# define LC_LINK_WIDTH_X4 3
# define LC_LINK_WIDTH_X8 4
# define LC_LINK_WIDTH_X16 6
-# define LC_LINK_WIDTH_RD_SHIFT 4
-# define LC_LINK_WIDTH_RD_MASK 0x70
-# define LC_RECONFIG_ARC_MISSING_ESCAPE (1 << 7)
-# define LC_RECONFIG_NOW (1 << 8)
-# define LC_RENEGOTIATION_SUPPORT (1 << 9)
-# define LC_RENEGOTIATE_EN (1 << 10)
-# define LC_SHORT_RECONFIG_EN (1 << 11)
-# define LC_UPCONFIGURE_SUPPORT (1 << 12)
-# define LC_UPCONFIGURE_DIS (1 << 13)
-# define LC_DYN_LANES_PWR_STATE(x) ((x) << 21)
-# define LC_DYN_LANES_PWR_STATE_MASK (0x3 << 21)
-# define LC_DYN_LANES_PWR_STATE_SHIFT 21
-#define PCIE_LC_N_FTS_CNTL 0xa3 /* PCIE_P */
-# define LC_XMIT_N_FTS(x) ((x) << 0)
-# define LC_XMIT_N_FTS_MASK (0xff << 0)
-# define LC_XMIT_N_FTS_SHIFT 0
-# define LC_XMIT_N_FTS_OVERRIDE_EN (1 << 8)
-# define LC_N_FTS_MASK (0xff << 24)
-#define PCIE_LC_SPEED_CNTL 0xa4 /* PCIE_P */
-# define LC_GEN2_EN_STRAP (1 << 0)
-# define LC_GEN3_EN_STRAP (1 << 1)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_EN (1 << 2)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_MASK (0x3 << 3)
-# define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT 3
-# define LC_FORCE_EN_SW_SPEED_CHANGE (1 << 5)
-# define LC_FORCE_DIS_SW_SPEED_CHANGE (1 << 6)
-# define LC_FORCE_EN_HW_SPEED_CHANGE (1 << 7)
-# define LC_FORCE_DIS_HW_SPEED_CHANGE (1 << 8)
-# define LC_INITIATE_LINK_SPEED_CHANGE (1 << 9)
-# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK (0x3 << 10)
-# define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT 10
-# define LC_CURRENT_DATA_RATE_MASK (0x3 << 13) /* 0/1/2 = gen1/2/3 */
-# define LC_CURRENT_DATA_RATE_SHIFT 13
-# define LC_CLR_FAILED_SPD_CHANGE_CNT (1 << 16)
-# define LC_OTHER_SIDE_EVER_SENT_GEN2 (1 << 18)
-# define LC_OTHER_SIDE_SUPPORTS_GEN2 (1 << 19)
-# define LC_OTHER_SIDE_EVER_SENT_GEN3 (1 << 20)
-# define LC_OTHER_SIDE_SUPPORTS_GEN3 (1 << 21)
-
-#define PCIE_LC_CNTL2 0xb1
-# define LC_ALLOW_PDWN_IN_L1 (1 << 17)
-# define LC_ALLOW_PDWN_IN_L23 (1 << 18)
-
-#define PCIE_LC_CNTL3 0xb5 /* PCIE_P */
-# define LC_GO_TO_RECOVERY (1 << 30)
-#define PCIE_LC_CNTL4 0xb6 /* PCIE_P */
-# define LC_REDO_EQ (1 << 5)
-# define LC_SET_QUIESCE (1 << 13)
-
-/*
- * UVD
- */
-#define UVD_UDEC_ADDR_CONFIG 0x3bd3
-#define UVD_UDEC_DB_ADDR_CONFIG 0x3bd4
-#define UVD_UDEC_DBW_ADDR_CONFIG 0x3bd5
-#define UVD_RBC_RB_RPTR 0x3da4
-#define UVD_RBC_RB_WPTR 0x3da5
-#define UVD_STATUS 0x3daf
-
-#define UVD_CGC_CTRL 0x3dc2
-# define DCM (1 << 0)
-# define CG_DT(x) ((x) << 2)
-# define CG_DT_MASK (0xf << 2)
-# define CLK_OD(x) ((x) << 6)
-# define CLK_OD_MASK (0x1f << 6)
-
- /* UVD CTX indirect */
-#define UVD_CGC_MEM_CTRL 0xC0
-#define UVD_CGC_CTRL2 0xC1
-# define DYN_OR_EN (1 << 0)
-# define DYN_RR_EN (1 << 1)
-# define G_DIV_ID(x) ((x) << 2)
-# define G_DIV_ID_MASK (0x7 << 2)
/*
* PM4
@@ -1874,45 +552,7 @@
/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
#define DMA1_REGISTER_OFFSET 0x200 /* not a register */
-
-#define DMA_RB_CNTL 0x3400
-# define DMA_RB_ENABLE (1 << 0)
-# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
-# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
-# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
-# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
-# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
-#define DMA_RB_BASE 0x3401
-#define DMA_RB_RPTR 0x3402
-#define DMA_RB_WPTR 0x3403
-
-#define DMA_RB_RPTR_ADDR_HI 0x3407
-#define DMA_RB_RPTR_ADDR_LO 0x3408
-
-#define DMA_IB_CNTL 0x3409
-# define DMA_IB_ENABLE (1 << 0)
-# define DMA_IB_SWAP_ENABLE (1 << 4)
-# define CMD_VMID_FORCE (1 << 31)
-#define DMA_IB_RPTR 0x340a
-#define DMA_CNTL 0x340b
-# define TRAP_ENABLE (1 << 0)
-# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
-# define SEM_WAIT_INT_ENABLE (1 << 2)
-# define DATA_SWAP_ENABLE (1 << 3)
-# define FENCE_SWAP_ENABLE (1 << 4)
-# define CTXEMPTY_INT_ENABLE (1 << 28)
-#define DMA_STATUS_REG 0x340d
-# define DMA_IDLE (1 << 0)
-#define DMA_TILING_CONFIG 0x342e
-
-#define DMA_POWER_CNTL 0x342f
-# define MEM_POWER_OVERRIDE (1 << 8)
-#define DMA_CLK_CTRL 0x3430
-
-#define DMA_PG 0x3435
-# define PG_CNTL_ENABLE (1 << 0)
-#define DMA_PGFSM_CONFIG 0x3436
-#define DMA_PGFSM_WRITE 0x3437
+#define SDMA_MAX_INSTANCE 2
#define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \
(((b) & 0x1) << 26) | \
@@ -1941,6 +581,7 @@
#define DMA_PACKET_POLL_REG_MEM 0xe
#define DMA_PACKET_NOP 0xf
+/* VCE */
#define VCE_STATUS 0x20004
#define VCE_VCPU_CNTL 0x20014
#define VCE_CLK_EN (1 << 0)
@@ -1991,431 +632,144 @@
//#dce stupp
/* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */
-#define SI_CRTC0_REGISTER_OFFSET 0 //(0x6df0 - 0x6df0)/4
-#define SI_CRTC1_REGISTER_OFFSET 0x300 //(0x79f0 - 0x6df0)/4
-#define SI_CRTC2_REGISTER_OFFSET 0x2600 //(0x105f0 - 0x6df0)/4
-#define SI_CRTC3_REGISTER_OFFSET 0x2900 //(0x111f0 - 0x6df0)/4
-#define SI_CRTC4_REGISTER_OFFSET 0x2c00 //(0x11df0 - 0x6df0)/4
-#define SI_CRTC5_REGISTER_OFFSET 0x2f00 //(0x129f0 - 0x6df0)/4
+#define CRTC0_REGISTER_OFFSET (0x1b7c - 0x1b7c) //(0x6df0 - 0x6df0)/4
+#define CRTC1_REGISTER_OFFSET (0x1e7c - 0x1b7c) //(0x79f0 - 0x6df0)/4
+#define CRTC2_REGISTER_OFFSET (0x417c - 0x1b7c) //(0x105f0 - 0x6df0)/4
+#define CRTC3_REGISTER_OFFSET (0x447c - 0x1b7c) //(0x111f0 - 0x6df0)/4
+#define CRTC4_REGISTER_OFFSET (0x477c - 0x1b7c) //(0x11df0 - 0x6df0)/4
+#define CRTC5_REGISTER_OFFSET (0x4a7c - 0x1b7c) //(0x129f0 - 0x6df0)/4
+
+/* hpd instance offsets */
+#define HPD0_REGISTER_OFFSET (0x1807 - 0x1807)
+#define HPD1_REGISTER_OFFSET (0x180a - 0x1807)
+#define HPD2_REGISTER_OFFSET (0x180d - 0x1807)
+#define HPD3_REGISTER_OFFSET (0x1810 - 0x1807)
+#define HPD4_REGISTER_OFFSET (0x1813 - 0x1807)
+#define HPD5_REGISTER_OFFSET (0x1816 - 0x1807)
+
+/* audio endpt instance offsets */
+#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780)
+#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780)
+#define AUD2_REGISTER_OFFSET (0x178c - 0x1780)
+#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780)
+#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780)
+#define AUD5_REGISTER_OFFSET (0x179d - 0x1780)
+#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780)
#define CURSOR_WIDTH 64
#define CURSOR_HEIGHT 64
-#define AMDGPU_MM_INDEX 0x0000
-#define AMDGPU_MM_DATA 0x0001
-
-#define VERDE_NUM_CRTC 6
-#define BLACKOUT_MODE_MASK 0x00000007
-#define VGA_RENDER_CONTROL 0xC0
-#define R_000300_VGA_RENDER_CONTROL 0xC0
-#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF
-#define EVERGREEN_CRTC_STATUS 0x1BA3
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4
-/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
-#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d
-#define EVERGREEN_CRTC_CONTROL 0x1b9c
-#define EVERGREEN_CRTC_MASTER_EN (1 << 0)
-#define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
-#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d
-#define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8)
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8
-#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5
-#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd
-#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-
-#define EVERGREEN_DATA_FORMAT 0x1ac0
-# define EVERGREEN_INTERLEAVE_EN (1 << 0)
-
-#define MC_SHARED_CHMAP__NOOFCHAN_MASK 0xf000
-#define MC_SHARED_CHMAP__NOOFCHAN__SHIFT 0xc
-
-#define R600_D1GRPH_ARRAY_MODE_LINEAR_GENERAL (0 << 20)
-#define R600_D1GRPH_ARRAY_MODE_LINEAR_ALIGNED (1 << 20)
-#define R600_D1GRPH_ARRAY_MODE_1D_TILED_THIN1 (2 << 20)
-#define R600_D1GRPH_ARRAY_MODE_2D_TILED_THIN1 (4 << 20)
-
-#define R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a45
-#define R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1845
-
-#define R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1847
-#define R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a47
-
-#define DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK 0x8
-#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK 0x8
-
-#define DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK 0x4
-#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK 0x4
-
-#define DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK 0x20000
-#define DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK 0x20000
-
-#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK 0x1
-#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK 0x100
-
-#define DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK 0x1
-
-#define R600_D1GRPH_SWAP_CONTROL 0x1843
-#define R600_D1GRPH_SWAP_ENDIAN_NONE (0 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_16BIT (1 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_32BIT (2 << 0)
-#define R600_D1GRPH_SWAP_ENDIAN_64BIT (3 << 0)
-
-#define AVIVO_D1VGA_CONTROL 0x00cc
-# define AVIVO_DVGA_CONTROL_MODE_ENABLE (1 << 0)
-# define AVIVO_DVGA_CONTROL_TIMING_SELECT (1 << 8)
-# define AVIVO_DVGA_CONTROL_SYNC_POLARITY_SELECT (1 << 9)
-# define AVIVO_DVGA_CONTROL_OVERSCAN_TIMING_SELECT (1 << 10)
-# define AVIVO_DVGA_CONTROL_OVERSCAN_COLOR_EN (1 << 16)
-# define AVIVO_DVGA_CONTROL_ROTATE (1 << 24)
-#define AVIVO_D2VGA_CONTROL 0x00ce
-
-#define R600_BUS_CNTL 0x1508
-# define R600_BIOS_ROM_DIS (1 << 1)
+
#define R600_ROM_CNTL 0x580
# define R600_SCK_OVERWRITE (1 << 1)
# define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28
# define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK (0xf << 28)
-#define GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK 0x1
-
-#define FMT_BIT_DEPTH_CONTROL 0x1bf2
-#define FMT_TRUNCATE_EN (1 << 0)
-#define FMT_TRUNCATE_DEPTH (1 << 4)
-#define FMT_SPATIAL_DITHER_EN (1 << 8)
-#define FMT_SPATIAL_DITHER_MODE(x) ((x) << 9)
-#define FMT_SPATIAL_DITHER_DEPTH (1 << 12)
-#define FMT_FRAME_RANDOM_ENABLE (1 << 13)
-#define FMT_RGB_RANDOM_ENABLE (1 << 14)
-#define FMT_HIGHPASS_RANDOM_ENABLE (1 << 15)
-#define FMT_TEMPORAL_DITHER_EN (1 << 16)
-#define FMT_TEMPORAL_DITHER_DEPTH (1 << 20)
-#define FMT_TEMPORAL_DITHER_OFFSET(x) ((x) << 21)
-#define FMT_TEMPORAL_LEVEL (1 << 24)
-#define FMT_TEMPORAL_DITHER_RESET (1 << 25)
-#define FMT_25FRC_SEL(x) ((x) << 26)
-#define FMT_50FRC_SEL(x) ((x) << 28)
-#define FMT_75FRC_SEL(x) ((x) << 30)
-
-#define EVERGREEN_DC_LUT_CONTROL 0x1a80
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_BLUE 0x1a81
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_GREEN 0x1a82
-#define EVERGREEN_DC_LUT_BLACK_OFFSET_RED 0x1a83
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_BLUE 0x1a84
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_GREEN 0x1a85
-#define EVERGREEN_DC_LUT_WHITE_OFFSET_RED 0x1a86
-#define EVERGREEN_DC_LUT_30_COLOR 0x1a7c
-#define EVERGREEN_DC_LUT_RW_INDEX 0x1a79
-#define EVERGREEN_DC_LUT_WRITE_EN_MASK 0x1a7e
-#define EVERGREEN_DC_LUT_RW_MODE 0x1a78
-
-#define EVERGREEN_GRPH_ENABLE 0x1a00
-#define EVERGREEN_GRPH_CONTROL 0x1a01
-#define EVERGREEN_GRPH_DEPTH(x) (((x) & 0x3) << 0)
-#define EVERGREEN_GRPH_DEPTH_8BPP 0
-#define EVERGREEN_GRPH_DEPTH_16BPP 1
-#define EVERGREEN_GRPH_DEPTH_32BPP 2
-#define EVERGREEN_GRPH_NUM_BANKS(x) (((x) & 0x3) << 2)
-#define EVERGREEN_ADDR_SURF_2_BANK 0
-#define EVERGREEN_ADDR_SURF_4_BANK 1
-#define EVERGREEN_ADDR_SURF_8_BANK 2
-#define EVERGREEN_ADDR_SURF_16_BANK 3
-#define EVERGREEN_GRPH_Z(x) (((x) & 0x3) << 4)
-#define EVERGREEN_GRPH_BANK_WIDTH(x) (((x) & 0x3) << 6)
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_1 0
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_2 1
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_4 2
-#define EVERGREEN_ADDR_SURF_BANK_WIDTH_8 3
-#define EVERGREEN_GRPH_FORMAT(x) (((x) & 0x7) << 8)
-
-#define EVERGREEN_GRPH_FORMAT_INDEXED 0
-#define EVERGREEN_GRPH_FORMAT_ARGB1555 0
-#define EVERGREEN_GRPH_FORMAT_ARGB565 1
-#define EVERGREEN_GRPH_FORMAT_ARGB4444 2
-#define EVERGREEN_GRPH_FORMAT_AI88 3
-#define EVERGREEN_GRPH_FORMAT_MONO16 4
-#define EVERGREEN_GRPH_FORMAT_BGRA5551 5
+#define GRPH_ARRAY_LINEAR_GENERAL 0
+#define GRPH_ARRAY_LINEAR_ALIGNED 1
+#define GRPH_ARRAY_1D_TILED_THIN1 2
+#define GRPH_ARRAY_2D_TILED_THIN1 4
+
+#define ES_AND_GS_AUTO 3
+#define BUF_SWAP_32BIT (2 << 16)
+
+#define GRPH_DEPTH_8BPP 0
+#define GRPH_DEPTH_16BPP 1
+#define GRPH_DEPTH_32BPP 2
+
+/* 8 BPP */
+#define GRPH_FORMAT_INDEXED 0
+
+/* 16 BPP */
+#define GRPH_FORMAT_ARGB1555 0
+#define GRPH_FORMAT_ARGB565 1
+#define GRPH_FORMAT_ARGB4444 2
+#define GRPH_FORMAT_AI88 3
+#define GRPH_FORMAT_MONO16 4
+#define GRPH_FORMAT_BGRA5551 5
/* 32 BPP */
-#define EVERGREEN_GRPH_FORMAT_ARGB8888 0
-#define EVERGREEN_GRPH_FORMAT_ARGB2101010 1
-#define EVERGREEN_GRPH_FORMAT_32BPP_DIG 2
-#define EVERGREEN_GRPH_FORMAT_8B_ARGB2101010 3
-#define EVERGREEN_GRPH_FORMAT_BGRA1010102 4
-#define EVERGREEN_GRPH_FORMAT_8B_BGRA1010102 5
-#define EVERGREEN_GRPH_FORMAT_RGB111110 6
-#define EVERGREEN_GRPH_FORMAT_BGR101111 7
-#define EVERGREEN_GRPH_BANK_HEIGHT(x) (((x) & 0x3) << 11)
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_1 0
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_2 1
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_4 2
-#define EVERGREEN_ADDR_SURF_BANK_HEIGHT_8 3
-#define EVERGREEN_GRPH_TILE_SPLIT(x) (((x) & 0x7) << 13)
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_64B 0
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_128B 1
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_256B 2
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_512B 3
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_1KB 4
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_2KB 5
-#define EVERGREEN_ADDR_SURF_TILE_SPLIT_4KB 6
-#define EVERGREEN_GRPH_MACRO_TILE_ASPECT(x) (((x) & 0x3) << 18)
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3
-#define EVERGREEN_GRPH_ARRAY_MODE(x) (((x) & 0x7) << 20)
-#define EVERGREEN_GRPH_ARRAY_LINEAR_GENERAL 0
-#define EVERGREEN_GRPH_ARRAY_LINEAR_ALIGNED 1
-#define EVERGREEN_GRPH_ARRAY_1D_TILED_THIN1 2
-#define EVERGREEN_GRPH_ARRAY_2D_TILED_THIN1 4
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_1 0
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_2 1
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_4 2
-#define EVERGREEN_ADDR_SURF_MACRO_TILE_ASPECT_8 3
-
-#define EVERGREEN_GRPH_SWAP_CONTROL 0x1a03
-#define EVERGREEN_GRPH_ENDIAN_SWAP(x) (((x) & 0x3) << 0)
-# define EVERGREEN_GRPH_ENDIAN_NONE 0
-# define EVERGREEN_GRPH_ENDIAN_8IN16 1
-# define EVERGREEN_GRPH_ENDIAN_8IN32 2
-# define EVERGREEN_GRPH_ENDIAN_8IN64 3
-#define EVERGREEN_GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
-# define EVERGREEN_GRPH_RED_SEL_R 0
-# define EVERGREEN_GRPH_RED_SEL_G 1
-# define EVERGREEN_GRPH_RED_SEL_B 2
-# define EVERGREEN_GRPH_RED_SEL_A 3
-#define EVERGREEN_GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
-# define EVERGREEN_GRPH_GREEN_SEL_G 0
-# define EVERGREEN_GRPH_GREEN_SEL_B 1
-# define EVERGREEN_GRPH_GREEN_SEL_A 2
-# define EVERGREEN_GRPH_GREEN_SEL_R 3
-#define EVERGREEN_GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
-# define EVERGREEN_GRPH_BLUE_SEL_B 0
-# define EVERGREEN_GRPH_BLUE_SEL_A 1
-# define EVERGREEN_GRPH_BLUE_SEL_R 2
-# define EVERGREEN_GRPH_BLUE_SEL_G 3
-#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
-# define EVERGREEN_GRPH_ALPHA_SEL_A 0
-# define EVERGREEN_GRPH_ALPHA_SEL_R 1
-# define EVERGREEN_GRPH_ALPHA_SEL_G 2
-# define EVERGREEN_GRPH_ALPHA_SEL_B 3
-
-#define EVERGREEN_D3VGA_CONTROL 0xf8
-#define EVERGREEN_D4VGA_CONTROL 0xf9
-#define EVERGREEN_D5VGA_CONTROL 0xfa
-#define EVERGREEN_D6VGA_CONTROL 0xfb
-
-#define EVERGREEN_GRPH_SURFACE_ADDRESS_MASK 0xffffff00
-
-#define EVERGREEN_GRPH_LUT_10BIT_BYPASS_CONTROL 0x1a02
-#define EVERGREEN_LUT_10BIT_BYPASS_EN (1 << 8)
-
-#define EVERGREEN_GRPH_PITCH 0x1a06
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_SURFACE_OFFSET_X 0x1a09
-#define EVERGREEN_GRPH_SURFACE_OFFSET_Y 0x1a0a
-#define EVERGREEN_GRPH_X_START 0x1a0b
-#define EVERGREEN_GRPH_Y_START 0x1a0c
-#define EVERGREEN_GRPH_X_END 0x1a0d
-#define EVERGREEN_GRPH_Y_END 0x1a0e
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_FLIP_CONTROL 0x1a12
-#define EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN (1 << 0)
-
-#define EVERGREEN_VIEWPORT_START 0x1b5c
-#define EVERGREEN_VIEWPORT_SIZE 0x1b5d
-#define EVERGREEN_DESKTOP_HEIGHT 0x1ac1
-
-/* CUR blocks at 0x6998, 0x7598, 0x10198, 0x10d98, 0x11998, 0x12598 */
-#define EVERGREEN_CUR_CONTROL 0x1a66
-# define EVERGREEN_CURSOR_EN (1 << 0)
-# define EVERGREEN_CURSOR_MODE(x) (((x) & 0x3) << 8)
-# define EVERGREEN_CURSOR_MONO 0
-# define EVERGREEN_CURSOR_24_1 1
-# define EVERGREEN_CURSOR_24_8_PRE_MULT 2
-# define EVERGREEN_CURSOR_24_8_UNPRE_MULT 3
-# define EVERGREEN_CURSOR_2X_MAGNIFY (1 << 16)
-# define EVERGREEN_CURSOR_FORCE_MC_ON (1 << 20)
-# define EVERGREEN_CURSOR_URGENT_CONTROL(x) (((x) & 0x7) << 24)
-# define EVERGREEN_CURSOR_URGENT_ALWAYS 0
-# define EVERGREEN_CURSOR_URGENT_1_8 1
-# define EVERGREEN_CURSOR_URGENT_1_4 2
-# define EVERGREEN_CURSOR_URGENT_3_8 3
-# define EVERGREEN_CURSOR_URGENT_1_2 4
-#define EVERGREEN_CUR_SURFACE_ADDRESS 0x1a67
-# define EVERGREEN_CUR_SURFACE_ADDRESS_MASK 0xfffff000
-#define EVERGREEN_CUR_SIZE 0x1a68
-#define EVERGREEN_CUR_SURFACE_ADDRESS_HIGH 0x1a69
-#define EVERGREEN_CUR_POSITION 0x1a6a
-#define EVERGREEN_CUR_HOT_SPOT 0x1a6b
-#define EVERGREEN_CUR_COLOR1 0x1a6c
-#define EVERGREEN_CUR_COLOR2 0x1a6d
-#define EVERGREEN_CUR_UPDATE 0x1a6e
-# define EVERGREEN_CURSOR_UPDATE_PENDING (1 << 0)
-# define EVERGREEN_CURSOR_UPDATE_TAKEN (1 << 1)
-# define EVERGREEN_CURSOR_UPDATE_LOCK (1 << 16)
-# define EVERGREEN_CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24)
-
-
-#define NI_INPUT_CSC_CONTROL 0x1a35
-# define NI_INPUT_CSC_GRPH_MODE(x) (((x) & 0x3) << 0)
-# define NI_INPUT_CSC_BYPASS 0
-# define NI_INPUT_CSC_PROG_COEFF 1
-# define NI_INPUT_CSC_PROG_SHARED_MATRIXA 2
-# define NI_INPUT_CSC_OVL_MODE(x) (((x) & 0x3) << 4)
-
-#define NI_OUTPUT_CSC_CONTROL 0x1a3c
-# define NI_OUTPUT_CSC_GRPH_MODE(x) (((x) & 0x7) << 0)
-# define NI_OUTPUT_CSC_BYPASS 0
-# define NI_OUTPUT_CSC_TV_RGB 1
-# define NI_OUTPUT_CSC_YCBCR_601 2
-# define NI_OUTPUT_CSC_YCBCR_709 3
-# define NI_OUTPUT_CSC_PROG_COEFF 4
-# define NI_OUTPUT_CSC_PROG_SHARED_MATRIXB 5
-# define NI_OUTPUT_CSC_OVL_MODE(x) (((x) & 0x7) << 4)
-
-#define NI_DEGAMMA_CONTROL 0x1a58
-# define NI_GRPH_DEGAMMA_MODE(x) (((x) & 0x3) << 0)
-# define NI_DEGAMMA_BYPASS 0
-# define NI_DEGAMMA_SRGB_24 1
-# define NI_DEGAMMA_XVYCC_222 2
-# define NI_OVL_DEGAMMA_MODE(x) (((x) & 0x3) << 4)
-# define NI_ICON_DEGAMMA_MODE(x) (((x) & 0x3) << 8)
-# define NI_CURSOR_DEGAMMA_MODE(x) (((x) & 0x3) << 12)
-
-#define NI_GAMUT_REMAP_CONTROL 0x1a59
-# define NI_GRPH_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 0)
-# define NI_GAMUT_REMAP_BYPASS 0
-# define NI_GAMUT_REMAP_PROG_COEFF 1
-# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXA 2
-# define NI_GAMUT_REMAP_PROG_SHARED_MATRIXB 3
-# define NI_OVL_GAMUT_REMAP_MODE(x) (((x) & 0x3) << 4)
-
-#define NI_REGAMMA_CONTROL 0x1aa0
-# define NI_GRPH_REGAMMA_MODE(x) (((x) & 0x7) << 0)
-# define NI_REGAMMA_BYPASS 0
-# define NI_REGAMMA_SRGB_24 1
-# define NI_REGAMMA_XVYCC_222 2
-# define NI_REGAMMA_PROG_A 3
-# define NI_REGAMMA_PROG_B 4
-# define NI_OVL_REGAMMA_MODE(x) (((x) & 0x7) << 4)
-
-
-#define NI_PRESCALE_GRPH_CONTROL 0x1a2d
-# define NI_GRPH_PRESCALE_BYPASS (1 << 4)
-
-#define NI_PRESCALE_OVL_CONTROL 0x1a31
-# define NI_OVL_PRESCALE_BYPASS (1 << 4)
-
-#define NI_INPUT_GAMMA_CONTROL 0x1a10
-# define NI_GRPH_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 0)
-# define NI_INPUT_GAMMA_USE_LUT 0
-# define NI_INPUT_GAMMA_BYPASS 1
-# define NI_INPUT_GAMMA_SRGB_24 2
-# define NI_INPUT_GAMMA_XVYCC_222 3
-# define NI_OVL_INPUT_GAMMA_MODE(x) (((x) & 0x3) << 4)
-
-#define BLACKOUT_MODE_MASK 0x00000007
-#define VGA_RENDER_CONTROL 0xC0
-#define R_000300_VGA_RENDER_CONTROL 0xC0
-#define C_000300_VGA_VSTATUS_CNTL 0xFFFCFFFF
-#define EVERGREEN_CRTC_STATUS 0x1BA3
-#define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_POSITION 0x1BA4
-/* CRTC blocks at 0x6df0, 0x79f0, 0x105f0, 0x111f0, 0x11df0, 0x129f0 */
-#define EVERGREEN_CRTC_V_BLANK_START_END 0x1b8d
-#define EVERGREEN_CRTC_CONTROL 0x1b9c
-# define EVERGREEN_CRTC_MASTER_EN (1 << 0)
-# define EVERGREEN_CRTC_DISP_READ_REQUEST_DISABLE (1 << 24)
-#define EVERGREEN_CRTC_BLANK_CONTROL 0x1b9d
-# define EVERGREEN_CRTC_BLANK_DATA_EN (1 << 8)
-# define EVERGREEN_CRTC_V_BLANK (1 << 0)
-#define EVERGREEN_CRTC_STATUS_HV_COUNT 0x1ba8
-#define EVERGREEN_CRTC_UPDATE_LOCK 0x1bb5
-#define EVERGREEN_MASTER_UPDATE_LOCK 0x1bbd
-#define EVERGREEN_MASTER_UPDATE_MODE 0x1bbe
-#define EVERGREEN_GRPH_UPDATE_LOCK (1 << 16)
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x1a07
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a08
-#define EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS 0x1a04
-#define EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS 0x1a05
-#define EVERGREEN_GRPH_UPDATE 0x1a11
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS 0xc4
-#define EVERGREEN_VGA_MEMORY_BASE_ADDRESS_HIGH 0xc9
-#define EVERGREEN_GRPH_SURFACE_UPDATE_PENDING (1 << 2)
-
-#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10
-#define mmVM_CONTEXT1_CNTL__xxRANGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x4
-#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80
-#define mmVM_CONTEXT1_CNTL__xxDUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x7
-#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x400
-#define mmVM_CONTEXT1_CNTL__xxPDE0_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xa
-#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x2000
-#define mmVM_CONTEXT1_CNTL__xxVALID_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0xd
-#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x10000
-#define mmVM_CONTEXT1_CNTL__xxREAD_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x10
-#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT_MASK 0x80000
-#define mmVM_CONTEXT1_CNTL__xxWRITE_PROTECTION_FAULT_ENABLE_DEFAULT__SHIFT 0x13
-
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID_MASK 0x1e000000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxVMID__SHIFT 0x19
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS_MASK 0xff
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxPROTECTIONS__SHIFT 0x0
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID_MASK 0xff000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_ID__SHIFT 0xc
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW_MASK 0x1000000
-#define mmVM_CONTEXT1_PROTECTION_FAULT_STATUS__xxMEMORY_CLIENT_RW__SHIFT 0x18
-
-#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE_MASK 0x7
-#define mmMC_SHARED_BLACKOUT_CNTL__xxBLACKOUT_MODE__SHIFT 0x0
-
-#define mmBIF_FB_EN__xxFB_READ_EN_MASK 0x1
-#define mmBIF_FB_EN__xxFB_READ_EN__SHIFT 0x0
-#define mmBIF_FB_EN__xxFB_WRITE_EN_MASK 0x2
-#define mmBIF_FB_EN__xxFB_WRITE_EN__SHIFT 0x1
-
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC_MASK 0x20000
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_VMC__SHIFT 0x11
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800
-#define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC__SHIFT 0xb
-
-#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8
-#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x3
-#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40
-#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x6
-#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x200
-#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x9
-#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x1000
-#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xc
-#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8000
-#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xf
-#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40000
-#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x12
+#define GRPH_FORMAT_ARGB8888 0
+#define GRPH_FORMAT_ARGB2101010 1
+#define GRPH_FORMAT_32BPP_DIG 2
+#define GRPH_FORMAT_8B_ARGB2101010 3
+#define GRPH_FORMAT_BGRA1010102 4
+#define GRPH_FORMAT_8B_BGRA1010102 5
+#define GRPH_FORMAT_RGB111110 6
+#define GRPH_FORMAT_BGR101111 7
+
+#define GRPH_ENDIAN_NONE 0
+#define GRPH_ENDIAN_8IN16 1
+#define GRPH_ENDIAN_8IN32 2
+#define GRPH_ENDIAN_8IN64 3
+#define GRPH_RED_SEL_R 0
+#define GRPH_RED_SEL_G 1
+#define GRPH_RED_SEL_B 2
+#define GRPH_RED_SEL_A 3
+
+#define GRPH_GREEN_SEL_G 0
+#define GRPH_GREEN_SEL_B 1
+#define GRPH_GREEN_SEL_A 2
+#define GRPH_GREEN_SEL_R 3
+
+#define GRPH_BLUE_SEL_B 0
+#define GRPH_BLUE_SEL_A 1
+#define GRPH_BLUE_SEL_R 2
+#define GRPH_BLUE_SEL_G 3
+
+#define GRPH_ALPHA_SEL_A 0
+#define GRPH_ALPHA_SEL_R 1
+#define GRPH_ALPHA_SEL_G 2
+#define GRPH_ALPHA_SEL_B 3
+
+/* CUR_CONTROL */
+ #define CURSOR_MONO 0
+ #define CURSOR_24_1 1
+ #define CURSOR_24_8_PRE_MULT 2
+ #define CURSOR_24_8_UNPRE_MULT 3
+ #define CURSOR_URGENT_ALWAYS 0
+ #define CURSOR_URGENT_1_8 1
+ #define CURSOR_URGENT_1_4 2
+ #define CURSOR_URGENT_3_8 3
+ #define CURSOR_URGENT_1_2 4
+
+/* INPUT_CSC_CONTROL */
+# define INPUT_CSC_BYPASS 0
+# define INPUT_CSC_PROG_COEFF 1
+# define INPUT_CSC_PROG_SHARED_MATRIXA 2
+
+/* OUTPUT_CSC_CONTROL */
+# define OUTPUT_CSC_BYPASS 0
+# define OUTPUT_CSC_TV_RGB 1
+# define OUTPUT_CSC_YCBCR_601 2
+# define OUTPUT_CSC_YCBCR_709 3
+# define OUTPUT_CSC_PROG_COEFF 4
+# define OUTPUT_CSC_PROG_SHARED_MATRIXB 5
+
+/* DEGAMMA_CONTROL */
+# define DEGAMMA_BYPASS 0
+# define DEGAMMA_SRGB_24 1
+# define DEGAMMA_XVYCC_222 2
+
+/* GAMUT_REMAP_CONTROL */
+# define GAMUT_REMAP_BYPASS 0
+# define GAMUT_REMAP_PROG_COEFF 1
+# define GAMUT_REMAP_PROG_SHARED_MATRIXA 2
+# define GAMUT_REMAP_PROG_SHARED_MATRIXB 3
+
+/* REGAMMA_CONTROL */
+# define REGAMMA_BYPASS 0
+# define REGAMMA_SRGB_24 1
+# define REGAMMA_XVYCC_222 2
+# define REGAMMA_PROG_A 3
+# define REGAMMA_PROG_B 4
+
+
+/* INPUT_GAMMA_CONTROL */
+# define INPUT_GAMMA_USE_LUT 0
+# define INPUT_GAMMA_BYPASS 1
+# define INPUT_GAMMA_SRGB_24 2
+# define INPUT_GAMMA_XVYCC_222 3
#define MC_SEQ_MISC0__MT__MASK 0xf0000000
#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
@@ -2426,28 +780,14 @@
#define MC_SEQ_MISC0__MT__HBM 0x60000000
#define MC_SEQ_MISC0__MT__DDR3 0xB0000000
-#define GRBM_STATUS__GUI_ACTIVE_MASK 0x80000000
#define CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK 0x4000000
-#define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK 0x800000
-#define CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK 0x400000
#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12)
#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
#define PACKET3_SEM_SEL_WAIT (0x7 << 29)
-#define CONFIG_CNTL 0x1509
-#define CC_DRM_ID_STRAPS 0X1559
#define AMDGPU_PCIE_INDEX 0xc
#define AMDGPU_PCIE_DATA 0xd
-#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0x3411
-#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0x3412
-#define DMA_MODE 0x342f
-#define DMA_RB_RPTR_ADDR_HI 0x3407
-#define DMA_RB_RPTR_ADDR_LO 0x3408
-#define DMA_BUSY_MASK 0x20
-#define DMA1_BUSY_MASK 0X40
-#define SDMA_MAX_INSTANCE 2
-
#define PCIE_BUS_CLK 10000
#define TCLK (PCIE_BUS_CLK / 10)
#define PCIE_PORT_INDEX 0xe
@@ -2457,8 +797,6 @@
#define EVERGREEN_PIF_PHY1_INDEX 0x10
#define EVERGREEN_PIF_PHY1_DATA 0x14
-#define MC_VM_FB_OFFSET 0x81a
-
/* Discrete VCE clocks */
#define CG_VCEPLL_FUNC_CNTL 0xc0030600
#define VCEPLL_RESET_MASK 0x00000001
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
index 481217c32d85..2594467bdd87 100644
--- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
@@ -81,15 +81,9 @@ static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_SDMA))
continue;
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
-
- if (r) {
- dev_err(adev->dev,
- "suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
- adev->ip_blocks[i].status.hw = false;
}
return 0;
@@ -175,15 +169,9 @@ static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
for (i = 0; i < adev->num_ip_blocks; i++) {
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- dev_err(adev->dev,
- "resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
-
- adev->ip_blocks[i].status.hw = true;
}
}
@@ -193,15 +181,9 @@ static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
adev->ip_blocks[i].version->type ==
AMD_IP_BLOCK_TYPE_SDMA))
continue;
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- dev_err(adev->dev,
- "resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
-
- adev->ip_blocks[i].status.hw = true;
}
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -213,7 +195,7 @@ static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->funcs->late_init) {
r = adev->ip_blocks[i].version->funcs->late_init(
- (void *)adev);
+ &adev->ip_blocks[i]);
if (r) {
dev_err(adev->dev,
"late_init of IP block <%s> failed %d after reset\n",
@@ -238,6 +220,7 @@ sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
int r;
struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
r = sienna_cichlid_mode2_restore_ip(tmp_adev);
@@ -255,6 +238,7 @@ sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
r = amdgpu_ib_ring_tests(tmp_adev);
if (r) {
dev_err(tmp_adev->dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
index 0af648931df5..70569ea906bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
@@ -80,15 +80,9 @@ static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_MES))
continue;
- r = adev->ip_blocks[i].version->funcs->suspend(adev);
-
- if (r) {
- dev_err(adev->dev,
- "suspend of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_suspend(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
- adev->ip_blocks[i].status.hw = false;
}
return 0;
@@ -186,15 +180,9 @@ static int smu_v13_0_10_mode2_restore_ip(struct amdgpu_device *adev)
adev->ip_blocks[i].version->type ==
AMD_IP_BLOCK_TYPE_SDMA))
continue;
- r = adev->ip_blocks[i].version->funcs->resume(adev);
- if (r) {
- dev_err(adev->dev,
- "resume of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
+ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
+ if (r)
return r;
- }
-
- adev->ip_blocks[i].status.hw = true;
}
for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -208,7 +196,7 @@ static int smu_v13_0_10_mode2_restore_ip(struct amdgpu_device *adev)
if (adev->ip_blocks[i].version->funcs->late_init) {
r = adev->ip_blocks[i].version->funcs->late_init(
- (void *)adev);
+ &adev->ip_blocks[i]);
if (r) {
dev_err(adev->dev,
"late_init of IP block <%s> failed %d after reset\n",
@@ -233,6 +221,7 @@ smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
int r;
struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
r = smu_v13_0_10_mode2_restore_ip(tmp_adev);
@@ -246,6 +235,7 @@ smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
+ amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
r = amdgpu_ib_ring_tests(tmp_adev);
if (r) {
dev_err(tmp_adev->dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 8d16dacdc172..c457be3a3c56 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -28,7 +28,6 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
@@ -90,8 +89,8 @@ static const struct amd_ip_funcs soc15_common_ip_funcs;
/* Vega, Raven, Arcturus */
static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 0)},
};
static const struct amdgpu_video_codecs vega_video_codecs_encode =
@@ -103,12 +102,11 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode =
/* Vega */
static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
};
static const struct amdgpu_video_codecs vega_video_codecs_decode =
@@ -120,12 +118,12 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode =
/* Raven */
static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
};
@@ -138,10 +136,10 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode =
/* Renoir, Arcturus */
static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
{
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@@ -171,6 +169,24 @@ static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = {
.codec_array = NULL,
};
+static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_encode_vcn0 = {
+ .codec_count = 0,
+ .codec_array = NULL,
+};
+
+static const struct amdgpu_video_codec_info vcn_5_0_1_video_codecs_decode_array_vcn0[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_decode_vcn0 = {
+ .codec_count = ARRAY_SIZE(vcn_5_0_1_video_codecs_decode_array_vcn0),
+ .codec_array = vcn_5_0_1_video_codecs_decode_array_vcn0,
+};
+
static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs)
{
@@ -209,6 +225,12 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
else
*codecs = &vcn_4_0_3_video_codecs_decode;
return 0;
+ case IP_VERSION(5, 0, 1):
+ if (encode)
+ *codecs = &vcn_5_0_1_video_codecs_encode_vcn0;
+ else
+ *codecs = &vcn_5_0_1_video_codecs_decode_vcn0;
+ return 0;
default:
return -EINVAL;
}
@@ -327,6 +349,7 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev)
if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 0) ||
amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 1) ||
amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) ||
amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 14))
return 10000;
if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 0) ||
@@ -556,10 +579,13 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
break;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 14):
+ case IP_VERSION(13, 0, 12):
/* Use gpu_recovery param to target a reset method.
* Enable triggering of GPU reset only if specified
* by module parameter.
*/
+ if (adev->pcie_reset_ctx.in_link_reset)
+ return AMD_RESET_METHOD_LINK;
if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
return AMD_RESET_METHOD_MODE2;
else if (!(adev->flags & AMD_IS_APU))
@@ -578,20 +604,14 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
static bool soc15_need_reset_on_resume(struct amdgpu_device *adev)
{
- u32 sol_reg;
-
- sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
-
/* Will reset for the following suspend abort cases.
- * 1) Only reset limit on APU side, dGPU hasn't checked yet.
- * 2) S3 suspend abort and TOS already launched.
+ * 1) S3 suspend aborted in the normal S3 suspend
+ * 2) S3 suspend aborted in performing pm core test.
*/
- if (adev->flags & AMD_IS_APU && adev->in_s3 &&
- !adev->suspend_complete &&
- sol_reg)
+ if (adev->in_s3 && !pm_resume_via_firmware())
return true;
-
- return false;
+ else
+ return false;
}
static int soc15_asic_reset(struct amdgpu_device *adev)
@@ -601,11 +621,17 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
* successfully. So now, temporarily enable it for the
* S3 suspend abort case.
*/
- if (((adev->apu_flags & AMD_APU_IS_RAVEN) ||
- (adev->apu_flags & AMD_APU_IS_RAVEN2)) &&
- !soc15_need_reset_on_resume(adev))
+
+ if ((adev->apu_flags & AMD_APU_IS_PICASSO ||
+ !(adev->apu_flags & AMD_APU_IS_RAVEN)) &&
+ soc15_need_reset_on_resume(adev))
+ goto asic_reset;
+
+ if ((adev->apu_flags & AMD_APU_IS_RAVEN) ||
+ (adev->apu_flags & AMD_APU_IS_RAVEN2))
return 0;
+asic_reset:
switch (soc15_asic_reset_method(adev)) {
case AMD_RESET_METHOD_PCI:
dev_info(adev->dev, "PCI reset\n");
@@ -616,6 +642,9 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
case AMD_RESET_METHOD_MODE2:
dev_info(adev->dev, "MODE2 reset\n");
return amdgpu_dpm_mode2_reset(adev);
+ case AMD_RESET_METHOD_LINK:
+ dev_info(adev->dev, "Link reset\n");
+ return amdgpu_device_link_reset(adev);
default:
dev_info(adev->dev, "MODE1 reset\n");
return amdgpu_device_mode1_reset(adev);
@@ -829,6 +858,10 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
if (adev->asic_type == CHIP_RENOIR)
return true;
+ if (amdgpu_gmc_need_reset_on_init(adev))
+ return true;
+ if (amdgpu_psp_tos_reload_needed(adev))
+ return true;
/* Just return false for soc15 GPUs. Reset does not seem to
* be necessary.
*/
@@ -929,9 +962,9 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
.get_reg_state = &aqua_vanjaram_get_reg_state,
};
-static int soc15_common_early_init(void *handle)
+static int soc15_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
@@ -1171,6 +1204,7 @@ static int soc15_common_early_init(void *handle)
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
+ case IP_VERSION(9, 5, 0):
adev->asic_funcs = &aqua_vanjaram_asic_funcs;
adev->cg_flags =
AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG |
@@ -1198,9 +1232,9 @@ static int soc15_common_early_init(void *handle)
return 0;
}
-static int soc15_common_late_init(void *handle)
+static int soc15_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_get_irq(adev);
@@ -1213,9 +1247,9 @@ static int soc15_common_late_init(void *handle)
return 0;
}
-static int soc15_common_sw_init(void *handle)
+static int soc15_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_add_irq_id(adev);
@@ -1227,9 +1261,9 @@ static int soc15_common_sw_init(void *handle)
return 0;
}
-static int soc15_common_sw_fini(void *handle)
+static int soc15_common_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->df.funcs &&
adev->df.funcs->sw_fini)
@@ -1251,9 +1285,9 @@ static void soc15_sdma_doorbell_range_init(struct amdgpu_device *adev)
}
}
-static int soc15_common_hw_init(void *handle)
+static int soc15_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* enable aspm */
soc15_program_aspm(adev);
@@ -1280,9 +1314,9 @@ static int soc15_common_hw_init(void *handle)
return 0;
}
-static int soc15_common_hw_fini(void *handle)
+static int soc15_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* Disable the doorbell aperture and selfring doorbell aperture
* separately in hw_fini because soc15_enable_doorbell_aperture
@@ -1295,7 +1329,12 @@ static int soc15_common_hw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_put_irq(adev);
+ /*
+ * For minimal init, late_init is not called, hence RAS irqs are not
+ * enabled.
+ */
if ((!amdgpu_sriov_vf(adev)) &&
+ (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
adev->nbio.ras_if &&
amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
if (adev->nbio.ras &&
@@ -1309,39 +1348,27 @@ static int soc15_common_hw_fini(void *handle)
return 0;
}
-static int soc15_common_suspend(void *handle)
+static int soc15_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return soc15_common_hw_fini(adev);
+ return soc15_common_hw_fini(ip_block);
}
-static int soc15_common_resume(void *handle)
+static int soc15_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (soc15_need_reset_on_resume(adev)) {
dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n");
soc15_asic_reset(adev);
}
- return soc15_common_hw_init(adev);
+ return soc15_common_hw_init(ip_block);
}
-static bool soc15_common_is_idle(void *handle)
+static bool soc15_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int soc15_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int soc15_common_soft_reset(void *handle)
-{
- return 0;
-}
-
static void soc15_update_drm_clock_gating(struct amdgpu_device *adev, bool enable)
{
uint32_t def, data;
@@ -1386,10 +1413,10 @@ static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable
WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data);
}
-static int soc15_common_set_clockgating_state(void *handle,
+static int soc15_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -1438,9 +1465,9 @@ static int soc15_common_set_clockgating_state(void *handle,
return 0;
}
-static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
+static void soc15_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -1454,6 +1481,7 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) &&
(amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) &&
+ (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 12)) &&
(amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 14))) {
/* AMD_CG_SUPPORT_DRM_MGCG */
data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0));
@@ -1474,7 +1502,7 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
adev->df.funcs->get_clockgating_state(adev, flags);
}
-static int soc15_common_set_powergating_state(void *handle,
+static int soc15_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* todo */
@@ -1492,11 +1520,7 @@ static const struct amd_ip_funcs soc15_common_ip_funcs = {
.suspend = soc15_common_suspend,
.resume = soc15_common_resume,
.is_idle = soc15_common_is_idle,
- .wait_for_idle = soc15_common_wait_for_idle,
- .soft_reset = soc15_common_soft_reset,
.set_clockgating_state = soc15_common_set_clockgating_state,
.set_powergating_state = soc15_common_set_powergating_state,
.get_clockgating_state= soc15_common_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index b9cbeb389edc..cf93fa477674 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -93,11 +93,25 @@
#define PACKET3_DISPATCH_INDIRECT 0x16
#define PACKET3_ATOMIC_GDS 0x1D
#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0)
+#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0
+#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1
#define PACKET3_OCCLUSION_QUERY 0x1F
#define PACKET3_SET_PREDICATION 0x20
#define PACKET3_REG_RMW 0x21
#define PACKET3_COND_EXEC 0x22
#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_PRED_EXEC__EXEC_COUNT(x) ((((unsigned)(x)) & 0x3FFF) << 0)
+#define PACKET3_PRED_EXEC__VIRTUAL_XCC_ID_SELECT(x) ((((unsigned)(x)) & 0xFF) << 24)
#define PACKET3_DRAW_INDIRECT 0x24
#define PACKET3_DRAW_INDEX_INDIRECT 0x25
#define PACKET3_INDEX_BASE 0x26
@@ -132,6 +146,28 @@
* 1 - pfp
* 2 - ce
*/
+#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_WRITE_DATA__RESUME_VF_MI300(x) ((((unsigned)(x)) & 0x1) << 19)
+#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2
+#define PACKET3_WRITE_DATA__DST_SEL__GDS 3
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5
+#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6
+#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0
+#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1
+#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0
+#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0
+#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1
+#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2
+#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3
#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
#define PACKET3_MEM_SEMAPHORE 0x39
# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
@@ -160,6 +196,33 @@
/* 0 - me
* 1 - pfp
*/
+#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4)
+#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6)
+#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22)
+#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24)
+#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x))
+#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1
+#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2
+#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3
+#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5
+#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0
+#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0
+#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1
+#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3
#define PACKET3_INDIRECT_BUFFER 0x3F
#define INDIRECT_BUFFER_VALID (1 << 23)
#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
@@ -169,7 +232,63 @@
*/
#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x))
+#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0)
+#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21)
+#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23)
+#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28)
+#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0
+#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1
#define PACKET3_COPY_DATA 0x40
+#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0)
+#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13)
+#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16)
+#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20)
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25)
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29)
+#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0)
+#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2)
+#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x))
+#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__SRC_SEL__MEMORY 1
+#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__SRC_SEL__GDS 3
+#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5
+#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7
+#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8
+#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0
+#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2
+#define PACKET3_COPY_DATA__DST_SEL__GDS 3
+#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4
+#define PACKET3_COPY_DATA__DST_SEL__MEMORY 5
+#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0
+#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1
+#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0
+#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0
+#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0
+#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1
#define PACKET3_PFP_SYNC_ME 0x42
#define PACKET3_COND_WRITE 0x45
#define PACKET3_EVENT_WRITE 0x46
@@ -181,6 +300,15 @@
* 3 - SAMPLE_STREAMOUTSTAT*
* 4 - *S_PARTIAL_FLUSH
*/
+#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8)
+#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 31)
+#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29)
+#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3)
+#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) (((unsigned)(x)) << 0)
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTATS 2
+#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4
#define PACKET3_RELEASE_MEM 0x49
#define EVENT_TYPE(x) ((x) << 0)
#define EVENT_INDEX(x) ((x) << 8)
@@ -286,6 +414,13 @@
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29)
#define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30)
#define PACKET3_REWIND 0x59
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI_VG10(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x))
+#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FF) << 0)
#define PACKET3_LOAD_UCONFIG_REG 0x5E
#define PACKET3_LOAD_SH_REG 0x5F
#define PACKET3_LOAD_CONFIG_REG 0x60
@@ -300,12 +435,16 @@
#define PACKET3_SET_SH_REG 0x76
#define PACKET3_SET_SH_REG_START 0x00002c00
#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
+#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23)
+#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28)
#define PACKET3_SET_SH_REG_OFFSET 0x77
#define PACKET3_SET_QUEUE_REG 0x78
#define PACKET3_SET_UCONFIG_REG 0x79
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28)
+#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0)
#define PACKET3_SCRATCH_RAM_WRITE 0x7D
#define PACKET3_SCRATCH_RAM_READ 0x7E
#define PACKET3_LOAD_CONST_RAM 0x80
@@ -413,6 +552,11 @@
# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
+#define PACKET3_RUN_CLEANER_SHADER_9_0 0xD7
+/* 1. header
+ * 2. RESERVED [31:0]
+ */
+
#define PACKET3_RUN_CLEANER_SHADER 0xD2
/* 1. header
* 2. RESERVED [31:0]
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index d30ad7d56def..ad36c96478a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -49,13 +49,13 @@ static const struct amd_ip_funcs soc21_common_ip_funcs;
/* SOC21 */
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
};
@@ -96,14 +96,14 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 = {
/* SRIOV SOC21, not const since data is controlled by host */
static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
};
static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn0 = {
@@ -117,23 +117,17 @@ static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = {
};
static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -390,6 +384,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
case IP_VERSION(14, 0, 0):
case IP_VERSION(14, 0, 1):
case IP_VERSION(14, 0, 4):
+ case IP_VERSION(14, 0, 5):
return AMD_RESET_METHOD_MODE2;
default:
if (amdgpu_dpm_is_baco_supported(adev))
@@ -556,9 +551,9 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = {
.update_umd_stable_pstate = &soc21_update_umd_stable_pstate,
};
-static int soc21_common_early_init(void *handle)
+static int soc21_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
@@ -781,6 +776,34 @@ static int soc21_common_early_init(void *handle)
AMD_PG_SUPPORT_GFX_PG;
adev->external_rev_id = adev->rev_id + 0x40;
break;
+ case IP_VERSION(11, 5, 3):
+ adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_JPEG_DPG |
+ AMD_PG_SUPPORT_JPEG |
+ AMD_PG_SUPPORT_GFX_PG;
+ adev->external_rev_id = adev->rev_id + 0x50;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@@ -794,9 +817,9 @@ static int soc21_common_early_init(void *handle)
return 0;
}
-static int soc21_common_late_init(void *handle)
+static int soc21_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev)) {
xgpu_nv_mailbox_get_irq(adev);
@@ -832,9 +855,9 @@ static int soc21_common_late_init(void *handle)
return 0;
}
-static int soc21_common_sw_init(void *handle)
+static int soc21_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_add_irq_id(adev);
@@ -842,14 +865,9 @@ static int soc21_common_sw_init(void *handle)
return 0;
}
-static int soc21_common_sw_fini(void *handle)
+static int soc21_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
-
-static int soc21_common_hw_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* enable aspm */
soc21_program_aspm(adev);
@@ -867,9 +885,9 @@ static int soc21_common_hw_init(void *handle)
return 0;
}
-static int soc21_common_hw_fini(void *handle)
+static int soc21_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* Disable the doorbell aperture and selfring doorbell aperture
* separately in hw_fini because soc21_enable_doorbell_aperture
@@ -890,11 +908,9 @@ static int soc21_common_hw_fini(void *handle)
return 0;
}
-static int soc21_common_suspend(void *handle)
+static int soc21_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return soc21_common_hw_fini(adev);
+ return soc21_common_hw_fini(ip_block);
}
static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
@@ -904,9 +920,10 @@ static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
/* Will reset for the following suspend abort cases.
* 1) Only reset dGPU side.
* 2) S3 suspend got aborted and TOS is active.
+ * As for dGPU suspend abort cases the SOL value
+ * will be kept as zero at this resume point.
*/
- if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
- !adev->suspend_complete) {
+ if (!(adev->flags & AMD_IS_APU) && adev->in_s3) {
sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
msleep(100);
sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
@@ -917,37 +934,27 @@ static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
return false;
}
-static int soc21_common_resume(void *handle)
+static int soc21_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (soc21_need_reset_on_resume(adev)) {
dev_info(adev->dev, "S3 suspend aborted, resetting...");
soc21_asic_reset(adev);
}
- return soc21_common_hw_init(adev);
+ return soc21_common_hw_init(ip_block);
}
-static bool soc21_common_is_idle(void *handle)
+static bool soc21_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int soc21_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int soc21_common_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int soc21_common_set_clockgating_state(void *handle,
+static int soc21_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(4, 3, 0):
@@ -956,6 +963,7 @@ static int soc21_common_set_clockgating_state(void *handle,
case IP_VERSION(7, 7, 1):
case IP_VERSION(7, 11, 0):
case IP_VERSION(7, 11, 1):
+ case IP_VERSION(7, 11, 2):
case IP_VERSION(7, 11, 3):
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
@@ -970,10 +978,10 @@ static int soc21_common_set_clockgating_state(void *handle,
return 0;
}
-static int soc21_common_set_powergating_state(void *handle,
+static int soc21_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
case IP_VERSION(6, 0, 0):
@@ -988,9 +996,9 @@ static int soc21_common_set_powergating_state(void *handle,
return 0;
}
-static void soc21_common_get_clockgating_state(void *handle, u64 *flags)
+static void soc21_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->nbio.funcs->get_clockgating_state(adev, flags);
@@ -1002,17 +1010,12 @@ static const struct amd_ip_funcs soc21_common_ip_funcs = {
.early_init = soc21_common_early_init,
.late_init = soc21_common_late_init,
.sw_init = soc21_common_sw_init,
- .sw_fini = soc21_common_sw_fini,
.hw_init = soc21_common_hw_init,
.hw_fini = soc21_common_hw_fini,
.suspend = soc21_common_suspend,
.resume = soc21_common_resume,
.is_idle = soc21_common_is_idle,
- .wait_for_idle = soc21_common_wait_for_idle,
- .soft_reset = soc21_common_soft_reset,
.set_clockgating_state = soc21_common_set_clockgating_state,
.set_powergating_state = soc21_common_set_powergating_state,
.get_clockgating_state = soc21_common_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c
index fd4c3d4f8387..972b449ab89f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc24.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc24.c
@@ -26,7 +26,6 @@
#include <linux/pci.h>
#include "amdgpu.h"
-#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
@@ -48,7 +47,7 @@
static const struct amd_ip_funcs soc24_common_ip_funcs;
static const struct amdgpu_video_codec_info vcn_5_0_0_video_codecs_encode_array_vcn0[] = {
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
@@ -363,9 +362,9 @@ static const struct amdgpu_asic_funcs soc24_asic_funcs = {
.update_umd_stable_pstate = &soc24_update_umd_stable_pstate,
};
-static int soc24_common_early_init(void *handle)
+static int soc24_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->nbio.funcs->set_reg_remap(adev);
adev->smc_rreg = NULL;
@@ -440,12 +439,22 @@ static int soc24_common_early_init(void *handle)
return 0;
}
-static int soc24_common_late_init(void *handle)
+static int soc24_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
xgpu_nv_mailbox_get_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ /* don't need to fail gpu late init
+ * if enabling athub_err_event interrupt failed
+ * nbif v6_3_1 only support fatal error hanlding
+ * just enable the interrupt directly
+ */
+ amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
/* Enable selfring doorbell aperture late because doorbell BAR
* aperture will change if resize BAR successfully in gmc sw_init.
@@ -455,9 +464,9 @@ static int soc24_common_late_init(void *handle)
return 0;
}
-static int soc24_common_sw_init(void *handle)
+static int soc24_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_nv_mailbox_add_irq_id(adev);
@@ -465,14 +474,9 @@ static int soc24_common_sw_init(void *handle)
return 0;
}
-static int soc24_common_sw_fini(void *handle)
+static int soc24_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- return 0;
-}
-
-static int soc24_common_hw_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* enable aspm */
soc24_program_aspm(adev);
@@ -494,9 +498,9 @@ static int soc24_common_hw_init(void *handle)
return 0;
}
-static int soc24_common_hw_fini(void *handle)
+static int soc24_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* Disable the doorbell aperture and selfring doorbell aperture
* separately in hw_fini because soc21_enable_doorbell_aperture
@@ -506,45 +510,36 @@ static int soc24_common_hw_fini(void *handle)
adev->nbio.funcs->enable_doorbell_aperture(adev, false);
adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false);
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
xgpu_nv_mailbox_put_irq(adev);
+ } else {
+ if (adev->nbio.ras &&
+ adev->nbio.ras_err_event_athub_irq.funcs)
+ amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+ }
return 0;
}
-static int soc24_common_suspend(void *handle)
+static int soc24_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return soc24_common_hw_fini(adev);
+ return soc24_common_hw_fini(ip_block);
}
-static int soc24_common_resume(void *handle)
+static int soc24_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return soc24_common_hw_init(adev);
+ return soc24_common_hw_init(ip_block);
}
-static bool soc24_common_is_idle(void *handle)
+static bool soc24_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int soc24_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int soc24_common_soft_reset(void *handle)
-{
- return 0;
-}
-
-static int soc24_common_set_clockgating_state(void *handle,
+static int soc24_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(6, 3, 1):
@@ -561,10 +556,10 @@ static int soc24_common_set_clockgating_state(void *handle,
return 0;
}
-static int soc24_common_set_powergating_state(void *handle,
+static int soc24_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) {
case IP_VERSION(7, 0, 0):
@@ -579,9 +574,9 @@ static int soc24_common_set_powergating_state(void *handle,
return 0;
}
-static void soc24_common_get_clockgating_state(void *handle, u64 *flags)
+static void soc24_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->nbio.funcs->get_clockgating_state(adev, flags);
@@ -595,14 +590,11 @@ static const struct amd_ip_funcs soc24_common_ip_funcs = {
.early_init = soc24_common_early_init,
.late_init = soc24_common_late_init,
.sw_init = soc24_common_sw_init,
- .sw_fini = soc24_common_sw_fini,
.hw_init = soc24_common_hw_init,
.hw_fini = soc24_common_hw_fini,
.suspend = soc24_common_suspend,
.resume = soc24_common_resume,
.is_idle = soc24_common_is_idle,
- .wait_for_idle = soc24_common_wait_for_idle,
- .soft_reset = soc24_common_soft_reset,
.set_clockgating_state = soc24_common_set_clockgating_state,
.set_powergating_state = soc24_common_set_powergating_state,
.get_clockgating_state = soc24_common_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 3ac56a9645eb..8a3f326474e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -30,6 +30,9 @@
#define RSP_ID_MASK (1U << 31)
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
+/* invalid node instance value */
+#define TA_RAS_INV_NODE 0xffff
+
/* RAS related enumerations */
/**********************************************************/
enum ras_command {
@@ -89,6 +92,9 @@ enum ta_ras_block {
TA_RAS_BLOCK__MCA,
TA_RAS_BLOCK__VCN,
TA_RAS_BLOCK__JPEG,
+ TA_RAS_BLOCK__IH,
+ TA_RAS_BLOCK__MPIO,
+ TA_RAS_BLOCK__MMSCH,
TA_NUM_BLOCK_MAX
};
@@ -113,6 +119,14 @@ enum ta_ras_address_type {
TA_RAS_PA_TO_MCA,
};
+enum ta_ras_nps_mode {
+ TA_RAS_UNKNOWN_MODE = 0,
+ TA_RAS_NPS1_MODE = 1,
+ TA_RAS_NPS2_MODE = 2,
+ TA_RAS_NPS4_MODE = 4,
+ TA_RAS_NPS8_MODE = 8,
+};
+
/* Input/output structures for RAS commands */
/**********************************************************/
@@ -139,6 +153,8 @@ struct ta_ras_init_flags {
uint8_t dgpu_mode;
uint16_t xcc_mask;
uint8_t channel_dis_num;
+ uint8_t nps_mode;
+ uint32_t active_umc_mask;
};
struct ta_ras_mca_addr {
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
index 00d8bdb8254f..9ec2e03d41c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h
@@ -31,10 +31,12 @@
* Secure Display Command ID
*/
enum ta_securedisplay_command {
- /* Query whether TA is responding used only for validation purpose */
+ /* Query whether TA is responding. It is used only for validation purpose */
TA_SECUREDISPLAY_COMMAND__QUERY_TA = 1,
/* Send region of Interest and CRC value to I2C */
TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC = 2,
+ /* V2 to send multiple regions of Interest and CRC value to I2C */
+ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2 = 3,
/* Maximum Command ID */
TA_SECUREDISPLAY_COMMAND__MAX_ID = 0x7FFFFFFF,
};
@@ -83,6 +85,8 @@ enum ta_securedisplay_ta_query_cmd_ret {
enum ta_securedisplay_buffer_size {
/* 15 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) */
TA_SECUREDISPLAY_I2C_BUFFER_SIZE = 15,
+ /* 16 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) + 1 byte(roi_idx) */
+ TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE = 16,
};
/** Input/output structures for Secure Display commands */
@@ -95,7 +99,15 @@ enum ta_securedisplay_buffer_size {
* Physical ID to determine which DIO scratch register should be used to get ROI
*/
struct ta_securedisplay_send_roi_crc_input {
- uint32_t phy_id; /* Physical ID */
+ /* Physical ID */
+ uint32_t phy_id;
+};
+
+struct ta_securedisplay_send_roi_crc_v2_input {
+ /* Physical ID */
+ uint32_t phy_id;
+ /* Region of interest index */
+ uint8_t roi_idx;
};
/** @union ta_securedisplay_cmd_input
@@ -104,6 +116,8 @@ struct ta_securedisplay_send_roi_crc_input {
union ta_securedisplay_cmd_input {
/* send ROI and CRC input buffer format */
struct ta_securedisplay_send_roi_crc_input send_roi_crc;
+ /* send ROI and CRC input buffer format, v2 adds a ROI index */
+ struct ta_securedisplay_send_roi_crc_v2_input send_roi_crc_v2;
uint32_t reserved[4];
};
@@ -128,6 +142,10 @@ struct ta_securedisplay_send_roi_crc_output {
uint8_t reserved;
};
+struct ta_securedisplay_send_roi_crc_v2_output {
+ uint8_t i2c_buf[TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE]; /* I2C buffer */
+};
+
/** @union ta_securedisplay_cmd_output
* Output buffer
*/
@@ -136,6 +154,8 @@ union ta_securedisplay_cmd_output {
struct ta_securedisplay_query_ta_output query_ta;
/* Send ROI CRC output buffer format used only for validation purpose */
struct ta_securedisplay_send_roi_crc_output send_roi_crc;
+ /* Send ROI CRC output buffer format used only for validation purpose */
+ struct ta_securedisplay_send_roi_crc_v2_output send_roi_crc_v2;
uint32_t reserved[4];
};
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 24d49d813607..7d17ae56f901 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -283,9 +283,9 @@ static void tonga_ih_set_rptr(struct amdgpu_device *adev,
}
}
-static int tonga_ih_early_init(void *handle)
+static int tonga_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = amdgpu_irq_add_domain(adev);
@@ -297,10 +297,10 @@ static int tonga_ih_early_init(void *handle)
return 0;
}
-static int tonga_ih_sw_init(void *handle)
+static int tonga_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, true);
if (r)
@@ -314,9 +314,9 @@ static int tonga_ih_sw_init(void *handle)
return r;
}
-static int tonga_ih_sw_fini(void *handle)
+static int tonga_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
amdgpu_irq_remove_domain(adev);
@@ -324,10 +324,10 @@ static int tonga_ih_sw_fini(void *handle)
return 0;
}
-static int tonga_ih_hw_init(void *handle)
+static int tonga_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = tonga_ih_irq_init(adev);
if (r)
@@ -336,32 +336,26 @@ static int tonga_ih_hw_init(void *handle)
return 0;
}
-static int tonga_ih_hw_fini(void *handle)
+static int tonga_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- tonga_ih_irq_disable(adev);
+ tonga_ih_irq_disable(ip_block->adev);
return 0;
}
-static int tonga_ih_suspend(void *handle)
+static int tonga_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return tonga_ih_hw_fini(adev);
+ return tonga_ih_hw_fini(ip_block);
}
-static int tonga_ih_resume(void *handle)
+static int tonga_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return tonga_ih_hw_init(adev);
+ return tonga_ih_hw_init(ip_block);
}
-static bool tonga_ih_is_idle(void *handle)
+static bool tonga_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 tmp = RREG32(mmSRBM_STATUS);
if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY))
@@ -370,11 +364,11 @@ static bool tonga_ih_is_idle(void *handle)
return true;
}
-static int tonga_ih_wait_for_idle(void *handle)
+static int tonga_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
u32 tmp;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
/* read MC_STATUS */
@@ -386,9 +380,9 @@ static int tonga_ih_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static bool tonga_ih_check_soft_reset(void *handle)
+static bool tonga_ih_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -405,29 +399,27 @@ static bool tonga_ih_check_soft_reset(void *handle)
}
}
-static int tonga_ih_pre_soft_reset(void *handle)
+static int tonga_ih_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->irq.srbm_soft_reset)
+ if (!ip_block->adev->irq.srbm_soft_reset)
return 0;
- return tonga_ih_hw_fini(adev);
+ return tonga_ih_hw_fini(ip_block);
}
-static int tonga_ih_post_soft_reset(void *handle)
+static int tonga_ih_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->irq.srbm_soft_reset)
return 0;
- return tonga_ih_hw_init(adev);
+ return tonga_ih_hw_init(ip_block);
}
-static int tonga_ih_soft_reset(void *handle)
+static int tonga_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->irq.srbm_soft_reset)
@@ -456,13 +448,13 @@ static int tonga_ih_soft_reset(void *handle)
return 0;
}
-static int tonga_ih_set_clockgating_state(void *handle,
+static int tonga_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int tonga_ih_set_powergating_state(void *handle,
+static int tonga_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -471,7 +463,6 @@ static int tonga_ih_set_powergating_state(void *handle,
static const struct amd_ip_funcs tonga_ih_ip_funcs = {
.name = "tonga_ih",
.early_init = tonga_ih_early_init,
- .late_init = NULL,
.sw_init = tonga_ih_sw_init,
.sw_fini = tonga_ih_sw_fini,
.hw_init = tonga_ih_hw_init,
@@ -486,8 +477,6 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = {
.post_soft_reset = tonga_ih_post_soft_reset,
.set_clockgating_state = tonga_ih_set_clockgating_state,
.set_powergating_state = tonga_ih_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs tonga_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 1a8ea834efa6..e590cbdd8de9 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -85,7 +85,8 @@ bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_sta
return (amdgpu_ras_is_poison_mode_supported(adev) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1));
+ ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) ||
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison) == 1)));
}
bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status)
@@ -173,156 +174,153 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev,
umc_v12_0_reset_error_count(adev);
}
-static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
- struct ta_ras_query_address_input *addr_in)
+static void umc_v12_0_get_retire_flip_bits(struct amdgpu_device *adev)
{
- uint32_t col, row, row_xor, bank, channel_index;
- uint64_t soc_pa, retired_page, column, err_addr;
- struct ta_ras_query_address_output addr_out;
-
- err_addr = addr_in->ma.err_addr;
- addr_in->addr_type = TA_RAS_MCA_TO_PA;
- if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) {
- dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
- err_addr);
-
- return;
+ enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE;
+ uint32_t vram_type = adev->gmc.vram_type;
+ struct amdgpu_umc_flip_bits *flip_bits = &(adev->umc.flip_bits);
+
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
+ /* default setting */
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_C2_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C3_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_C4_BIT;
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R13_BIT;
+ flip_bits->flip_row_bit = 13;
+ flip_bits->bit_num = 4;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R13_BIT;
+
+ if (nps == AMDGPU_NPS2_PARTITION_MODE) {
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH5_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_C2_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B1_BIT;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R12_BIT;
+ } else if (nps == AMDGPU_NPS4_PARTITION_MODE) {
+ flip_bits->flip_bits_in_pa[0] = UMC_V12_0_PA_CH4_BIT;
+ flip_bits->flip_bits_in_pa[1] = UMC_V12_0_PA_CH5_BIT;
+ flip_bits->flip_bits_in_pa[2] = UMC_V12_0_PA_B0_BIT;
+ flip_bits->r13_in_pa = UMC_V12_0_PA_R11_BIT;
}
- soc_pa = addr_out.pa.pa;
- bank = addr_out.pa.bank;
- channel_index = addr_out.pa.channel_idx;
-
- col = (err_addr >> 1) & 0x1fULL;
- row = (err_addr >> 10) & 0x3fffULL;
- row_xor = row ^ (0x1ULL << 13);
- /* clear [C3 C2] in soc physical address */
- soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
- /* clear [C4] in soc physical address */
- soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);
-
- /* loop for all possibilities of [C4 C3 C2] */
- for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
- retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
- retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);
- /* include column bit 0 and 1 */
- col &= 0x3;
- col |= (column << 2);
- dev_info(adev->dev,
- "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
- retired_page, row, col, bank, channel_index);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, addr_in->ma.umc_inst);
-
- /* shift R13 bit */
- retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
- dev_info(adev->dev,
- "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
- retired_page, row_xor, col, bank, channel_index);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, addr_in->ma.umc_inst);
+ switch (vram_type) {
+ case AMDGPU_VRAM_TYPE_HBM:
+ /* other nps modes are taken as nps1 */
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
+ else if (nps == AMDGPU_NPS4_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+
+ break;
+ case AMDGPU_VRAM_TYPE_HBM3E:
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R12_BIT;
+ flip_bits->flip_row_bit = 12;
+
+ if (nps == AMDGPU_NPS2_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R11_BIT;
+ else if (nps == AMDGPU_NPS4_PARTITION_MODE)
+ flip_bits->flip_bits_in_pa[3] = UMC_V12_0_PA_R10_BIT;
+
+ break;
+ default:
+ dev_warn(adev->dev,
+ "Unknown HBM type, set RAS retire flip bits to the value in NPS1 mode.\n");
+ break;
}
-}
-static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev,
- struct ta_ras_query_address_output *addr_out,
- uint64_t err_addr)
-{
- uint32_t col, row, row_xor, bank, channel_index;
- uint64_t soc_pa, retired_page, column;
-
- soc_pa = addr_out->pa.pa;
- bank = addr_out->pa.bank;
- channel_index = addr_out->pa.channel_idx;
-
- col = (err_addr >> 1) & 0x1fULL;
- row = (err_addr >> 10) & 0x3fffULL;
- row_xor = row ^ (0x1ULL << 13);
- /* clear [C3 C2] in soc physical address */
- soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
- /* clear [C4] in soc physical address */
- soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);
-
- /* loop for all possibilities of [C4 C3 C2] */
- for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
- retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
- retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);
- /* include column bit 0 and 1 */
- col &= 0x3;
- col |= (column << 2);
- dev_info(adev->dev,
- "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
- retired_page, row, col, bank, channel_index);
-
- /* shift R13 bit */
- retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
- dev_info(adev->dev,
- "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
- retired_page, row_xor, col, bank, channel_index);
- }
+ adev->umc.retire_unit = 0x1 << flip_bits->bit_num;
}
-static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
- uint64_t pa_addr, uint64_t *pfns, int len)
+static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct ta_ras_query_address_input *addr_in,
+ struct ta_ras_query_address_output *addr_out,
+ bool dump_addr)
{
- uint64_t soc_pa, retired_page, column;
- uint32_t pos = 0;
-
- soc_pa = pa_addr;
- /* clear [C3 C2] in soc physical address */
- soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
- /* clear [C4] in soc physical address */
- soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);
-
- /* loop for all possibilities of [C4 C3 C2] */
- for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
- retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
- retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);
-
- if (pos >= len)
- return 0;
- pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+ uint32_t col, col_lower, row, row_lower, row_high, bank;
+ uint32_t channel_index = 0, umc_inst = 0;
+ uint32_t i, bit_num, retire_unit, *flip_bits;
+ uint64_t soc_pa, column, err_addr;
+ struct ta_ras_query_address_output addr_out_tmp;
+ struct ta_ras_query_address_output *paddr_out;
+ int ret = 0;
+
+ if (!addr_out)
+ paddr_out = &addr_out_tmp;
+ else
+ paddr_out = addr_out;
- /* shift R13 bit */
- retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
+ err_addr = bank = 0;
+ if (addr_in) {
+ err_addr = addr_in->ma.err_addr;
+ addr_in->addr_type = TA_RAS_MCA_TO_PA;
+ ret = psp_ras_query_address(&adev->psp, addr_in, paddr_out);
+ if (ret) {
+ dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
+ err_addr);
- if (pos >= len)
- return 0;
- pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+ goto out;
+ }
+ bank = paddr_out->pa.bank;
+ /* no need to care about umc inst if addr_in is NULL */
+ umc_inst = addr_in->ma.umc_inst;
}
- return pos;
-}
-
-static int umc_v12_0_convert_mca_to_addr(struct amdgpu_device *adev,
- uint64_t err_addr, uint32_t ch, uint32_t umc,
- uint32_t node, uint32_t socket,
- uint64_t *addr, bool dump_addr)
-{
- struct ta_ras_query_address_input addr_in;
- struct ta_ras_query_address_output addr_out;
-
- memset(&addr_in, 0, sizeof(addr_in));
- addr_in.ma.err_addr = err_addr;
- addr_in.ma.ch_inst = ch;
- addr_in.ma.umc_inst = umc;
- addr_in.ma.node_inst = node;
- addr_in.ma.socket_id = socket;
- addr_in.addr_type = TA_RAS_MCA_TO_PA;
- if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) {
- dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
- err_addr);
- return -EINVAL;
+ flip_bits = adev->umc.flip_bits.flip_bits_in_pa;
+ bit_num = adev->umc.flip_bits.bit_num;
+ retire_unit = adev->umc.retire_unit;
+
+ soc_pa = paddr_out->pa.pa;
+ channel_index = paddr_out->pa.channel_idx;
+ /* clear loop bits in soc physical address */
+ for (i = 0; i < bit_num; i++)
+ soc_pa &= ~BIT_ULL(flip_bits[i]);
+
+ paddr_out->pa.pa = soc_pa;
+ /* get column bit 0 and 1 in mca address */
+ col_lower = (err_addr >> 1) & 0x3ULL;
+ /* extra row bit will be handled later */
+ row_lower = (err_addr >> UMC_V12_0_MA_R0_BIT) & 0x1fffULL;
+ row_lower &= ~BIT_ULL(adev->umc.flip_bits.flip_row_bit);
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 5, 0)) {
+ row_high = (soc_pa >> adev->umc.flip_bits.r13_in_pa) & 0x3ULL;
+ /* it's 2.25GB in each channel, from MCA address to PA
+ * [R14 R13] is converted if the two bits value are 0x3,
+ * get them from PA instead of MCA address.
+ */
+ row_lower |= (row_high << 13);
}
- if (dump_addr)
- umc_v12_0_dump_addr_info(adev, &addr_out, err_addr);
-
- *addr = addr_out.pa.pa;
+ if (!err_data && !dump_addr)
+ goto out;
+
+ /* loop for all possibilities of retired bits */
+ for (column = 0; column < retire_unit; column++) {
+ soc_pa = paddr_out->pa.pa;
+ for (i = 0; i < bit_num; i++)
+ soc_pa |= (((column >> i) & 0x1ULL) << flip_bits[i]);
+
+ col = ((column & 0x7) << 2) | col_lower;
+ /* handle extra row bit */
+ if (bit_num == RETIRE_FLIP_BITS_NUM)
+ row = ((column >> 3) << adev->umc.flip_bits.flip_row_bit) |
+ row_lower;
+
+ if (dump_addr)
+ dev_info(adev->dev,
+ "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
+ soc_pa, row, col, bank, channel_index);
+
+ if (err_data)
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ soc_pa, channel_index, umc_inst);
+ }
- return 0;
+out:
+ return ret;
}
static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
@@ -374,7 +372,7 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
addr_in.ma.umc_inst = umc_inst;
addr_in.ma.node_inst = node_inst;
- umc_v12_0_convert_error_address(adev, err_data, &addr_in);
+ umc_v12_0_convert_error_address(adev, err_data, &addr_in, NULL, true);
}
/* clear umc status */
@@ -475,6 +473,7 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank
err_type = ACA_ERROR_TYPE_CE;
else
return 0;
+ bank->aca_err_type = err_type;
ret = aca_bank_info_decode(bank, &info);
if (ret)
@@ -486,8 +485,12 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank
bank->regs[ACA_REG_IDX_ADDR]);
ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
- count = ext_error_code == 0 ?
- ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
+ if (umc_v12_0_is_deferred_error(adev, status))
+ count = ext_error_code == 0 ?
+ adev->umc.err_addr_cnt / adev->umc.retire_unit : 1ULL;
+ else
+ count = ext_error_code == 0 ?
+ ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
return aca_error_cache_log_bank_error(handle, &info, err_type, count);
}
@@ -526,6 +529,8 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
uint64_t err_addr, pa_addr = 0;
struct ras_ecc_err *ecc_err;
+ struct ta_ras_query_address_output addr_out;
+ uint32_t shift_bit = adev->umc.flip_bits.flip_bits_in_pa[2];
int count, ret, i;
hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
@@ -552,10 +557,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
MCA_IPID_2_UMC_CH(ipid),
err_addr);
- ret = umc_v12_0_convert_mca_to_addr(adev,
+ ret = amdgpu_umc_mca_to_addr(adev,
err_addr, MCA_IPID_2_UMC_CH(ipid),
MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid),
- MCA_IPID_2_SOCKET_ID(ipid), &pa_addr, true);
+ MCA_IPID_2_SOCKET_ID(ipid), &addr_out, true);
if (ret)
return ret;
@@ -563,14 +568,16 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
if (!ecc_err)
return -ENOMEM;
+ pa_addr = addr_out.pa.pa;
ecc_err->status = status;
ecc_err->ipid = ipid;
ecc_err->addr = addr;
- ecc_err->pa_pfn = UMC_V12_ADDR_MASK_BAD_COLS(pa_addr) >> AMDGPU_GPU_PAGE_SHIFT;
+ ecc_err->pa_pfn = pa_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ ecc_err->channel_idx = addr_out.pa.channel_idx;
/* If converted pa_pfn is 0, use pa C4 pfn. */
if (!ecc_err->pa_pfn)
- ecc_err->pa_pfn = BIT_ULL(UMC_V12_0_PA_C4_BIT) >> AMDGPU_GPU_PAGE_SHIFT;
+ ecc_err->pa_pfn = BIT_ULL(shift_bit) >> AMDGPU_GPU_PAGE_SHIFT;
ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err);
if (ret) {
@@ -586,7 +593,7 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
con->umc_ecc_log.de_queried_count++;
memset(page_pfn, 0, sizeof(page_pfn));
- count = umc_v12_0_lookup_bad_pages_in_a_row(adev,
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
pa_addr,
page_pfn, ARRAY_SIZE(page_pfn));
if (count <= 0) {
@@ -629,7 +636,7 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev,
return -EINVAL;
memset(page_pfn, 0, sizeof(page_pfn));
- count = umc_v12_0_lookup_bad_pages_in_a_row(adev,
+ count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT,
page_pfn, ARRAY_SIZE(page_pfn));
@@ -637,7 +644,7 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev,
ret = amdgpu_umc_fill_error_record(err_data,
ecc_err->addr,
page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT,
- MCA_IPID_2_UMC_CH(ecc_err->ipid),
+ ecc_err->channel_idx,
MCA_IPID_2_UMC_INST(ecc_err->ipid));
if (ret)
break;
@@ -676,6 +683,31 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
mutex_unlock(&con->umc_ecc_log.lock);
}
+static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev,
+ uint64_t mca_addr, uint64_t retired_page)
+{
+ uint32_t die = 0;
+
+ /* we only calculate die id for nps1 mode right now */
+ die += ((((retired_page >> 12) & 0x1ULL)^
+ ((retired_page >> 20) & 0x1ULL) ^
+ ((retired_page >> 27) & 0x1ULL) ^
+ ((retired_page >> 34) & 0x1ULL) ^
+ ((retired_page >> 41) & 0x1ULL)) << 0);
+
+ /* the original PA_C4 and PA_R13 may be cleared in retired_page, so
+ * get them from mca_addr.
+ */
+ die += ((((retired_page >> 13) & 0x1ULL) ^
+ ((mca_addr >> 5) & 0x1ULL) ^
+ ((retired_page >> 28) & 0x1ULL) ^
+ ((mca_addr >> 23) & 0x1ULL) ^
+ ((retired_page >> 42) & 0x1ULL)) << 1);
+ die &= 3;
+
+ return die;
+}
+
struct amdgpu_umc_ras umc_v12_0_ras = {
.ras_block = {
.hw_ops = &umc_v12_0_ras_hw_ops,
@@ -686,5 +718,8 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
.ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr,
.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
.update_ecc_status = umc_v12_0_update_ecc_status,
+ .convert_ras_err_addr = umc_v12_0_convert_error_address,
+ .get_die_id_from_pa = umc_v12_0_get_die_id,
+ .get_retire_flip_bits = umc_v12_0_get_retire_flip_bits,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index be5598d76c1d..63b7e7254526 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -58,9 +58,22 @@
/* column bits in SOC physical address */
#define UMC_V12_0_PA_C2_BIT 15
+#define UMC_V12_0_PA_C3_BIT 16
#define UMC_V12_0_PA_C4_BIT 21
/* row bits in SOC physical address */
+#define UMC_V12_0_PA_R0_BIT 22
+#define UMC_V12_0_PA_R10_BIT 32
+#define UMC_V12_0_PA_R11_BIT 33
+#define UMC_V12_0_PA_R12_BIT 34
#define UMC_V12_0_PA_R13_BIT 35
+/* channel bit in SOC physical address */
+#define UMC_V12_0_PA_CH4_BIT 12
+#define UMC_V12_0_PA_CH5_BIT 13
+/* bank bit in SOC physical address */
+#define UMC_V12_0_PA_B0_BIT 19
+#define UMC_V12_0_PA_B1_BIT 20
+/* row bits in MCA address */
+#define UMC_V12_0_MA_R0_BIT 10
#define MCA_UMC_HWID_V12_0 0x96
#define MCA_UMC_MCATYPE_V12_0 0x0
@@ -81,11 +94,6 @@
(((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \
(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03))
-#define UMC_V12_ADDR_MASK_BAD_COLS(addr) \
- ((addr) & ~((0x3ULL << UMC_V12_0_PA_C2_BIT) | \
- (0x1ULL << UMC_V12_0_PA_C4_BIT) | \
- (0x1ULL << UMC_V12_0_PA_R13_BIT)))
-
bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c
new file mode 100644
index 000000000000..eaca10a3c4a9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v8_14.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_8_14_0_offset.h"
+#include "umc/umc_8_14_0_sh_mask.h"
+
+static inline uint32_t get_umc_v8_14_reg_offset(struct amdgpu_device *adev,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs * ch_inst + UMC_V8_14_INST_DIST * umc_inst;
+}
+
+static int umc_v8_14_clear_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V8_14_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_14_clear_error_count(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_clear_error_count_per_channel, NULL);
+}
+
+static void umc_v8_14_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+
+ /* UMC 8_14 registers */
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccErrCnt) -
+ UMC_V8_14_CE_CNT_INIT);
+}
+
+static void umc_v8_14_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ /* UMC 8_14 registers */
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccUnCorrErrCnt) -
+ UMC_V8_14_CE_CNT_INIT);
+}
+
+static int umc_v8_14_query_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ umc_v8_14_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v8_14_query_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+
+ return 0;
+}
+
+static void umc_v8_14_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_query_error_count_per_channel, ras_error_status);
+
+ umc_v8_14_clear_error_count(adev);
+}
+
+static int umc_v8_14_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t node_inst, uint32_t umc_inst,
+ uint32_t ch_inst, void *data)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+ uint32_t umc_reg_offset =
+ get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst);
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt);
+
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_GeccErrCntSel,
+ GeccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_14_CE_CNT_INIT);
+
+ return 0;
+}
+
+static void umc_v8_14_err_cnt_init(struct amdgpu_device *adev)
+{
+ amdgpu_umc_loop_channels(adev,
+ umc_v8_14_err_cnt_init_per_channel, NULL);
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v8_14_ras_hw_ops = {
+ .query_ras_error_count = umc_v8_14_query_ras_error_count,
+};
+
+struct amdgpu_umc_ras umc_v8_14_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_14_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_14_err_cnt_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h
new file mode 100644
index 000000000000..20a258f0017a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V8_14_H__
+#define __UMC_V8_14_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V8_14_CHANNEL_INSTANCE_NUM 2
+/* number of umc instance with memory map register access */
+#define UMC_V8_14_UMC_INSTANCE_NUM(adev) ((adev)->umc.node_inst_num)
+
+/* Total channel instances for all available umc nodes */
+#define UMC_V8_14_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V8_14_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc)
+
+/* UMC register per channel offset */
+#define UMC_V8_14_PER_CHANNEL_OFFSET 0x400
+
+#define UMC_V8_14_INST_DIST 0x40000
+
+/* EccErrCnt max value */
+#define UMC_V8_14_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UMC_V8_14_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V8_14_CE_CNT_INIT (UMC_V8_14_CE_CNT_MAX - UMC_V8_14_CE_INT_THRESHOLD)
+
+extern struct amdgpu_umc_ras umc_v8_14_ras;
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
index 805d6662c88b..5dbaebb592b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
@@ -98,7 +98,7 @@ static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring,
}
/**
- * uvd_v3_1_ring_emit_fence - emit an fence & trap command
+ * uvd_v3_1_ring_emit_fence - emit a fence & trap command
*
* @ring: amdgpu_ring pointer
* @addr: address
@@ -242,7 +242,7 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev)
uint64_t addr;
uint32_t size;
- /* programm the VCPU memory controller bits 0-27 */
+ /* program the VCPU memory controller bits 0-27 */
addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
@@ -416,7 +416,7 @@ static int uvd_v3_1_start(struct amdgpu_device *adev)
/* Set the write pointer delay */
WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0);
- /* programm the 4GB memory segment for rptr and ring buffer */
+ /* Program the 4GB memory segment for rptr and ring buffer */
WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
(0x7 << 16) | (0x1 << 31));
@@ -531,9 +531,9 @@ static void uvd_v3_1_set_irq_funcs(struct amdgpu_device *adev)
}
-static int uvd_v3_1_early_init(void *handle)
+static int uvd_v3_1_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v3_1_set_ring_funcs(adev);
@@ -542,10 +542,10 @@ static int uvd_v3_1_early_init(void *handle)
return 0;
}
-static int uvd_v3_1_sw_init(void *handle)
+static int uvd_v3_1_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
void *ptr;
uint32_t ucode_len;
@@ -580,10 +580,10 @@ static int uvd_v3_1_sw_init(void *handle)
return r;
}
-static int uvd_v3_1_sw_fini(void *handle)
+static int uvd_v3_1_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -621,13 +621,13 @@ static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev,
/**
* uvd_v3_1_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v3_1_hw_init(void *handle)
+static int uvd_v3_1_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
@@ -688,13 +688,13 @@ done:
/**
* uvd_v3_1_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v3_1_hw_fini(void *handle)
+static int uvd_v3_1_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -704,17 +704,17 @@ static int uvd_v3_1_hw_fini(void *handle)
return 0;
}
-static int uvd_v3_1_prepare_suspend(void *handle)
+static int uvd_v3_1_prepare_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return amdgpu_uvd_prepare_suspend(adev);
}
-static int uvd_v3_1_suspend(void *handle)
+static int uvd_v3_1_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -740,36 +740,35 @@ static int uvd_v3_1_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v3_1_hw_fini(adev);
+ r = uvd_v3_1_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v3_1_resume(void *handle)
+static int uvd_v3_1_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v3_1_hw_init(adev);
+ return uvd_v3_1_hw_init(ip_block);
}
-static bool uvd_v3_1_is_idle(void *handle)
+static bool uvd_v3_1_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v3_1_wait_for_idle(void *handle)
+static int uvd_v3_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -778,9 +777,9 @@ static int uvd_v3_1_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v3_1_soft_reset(void *handle)
+static int uvd_v3_1_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v3_1_stop(adev);
@@ -791,13 +790,13 @@ static int uvd_v3_1_soft_reset(void *handle)
return uvd_v3_1_start(adev);
}
-static int uvd_v3_1_set_clockgating_state(void *handle,
+static int uvd_v3_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int uvd_v3_1_set_powergating_state(void *handle,
+static int uvd_v3_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -806,7 +805,6 @@ static int uvd_v3_1_set_powergating_state(void *handle,
static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
.name = "uvd_v3_1",
.early_init = uvd_v3_1_early_init,
- .late_init = NULL,
.sw_init = uvd_v3_1_sw_init,
.sw_fini = uvd_v3_1_sw_fini,
.hw_init = uvd_v3_1_hw_init,
@@ -819,8 +817,6 @@ static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
.soft_reset = uvd_v3_1_soft_reset,
.set_clockgating_state = uvd_v3_1_set_clockgating_state,
.set_powergating_state = uvd_v3_1_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version uvd_v3_1_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 3f19c606f4de..4b96fd583772 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -44,7 +44,7 @@ static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v4_2_start(struct amdgpu_device *adev);
static void uvd_v4_2_stop(struct amdgpu_device *adev);
-static int uvd_v4_2_set_clockgating_state(void *handle,
+static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v4_2_set_dcm(struct amdgpu_device *adev,
bool sw_mode);
@@ -90,9 +90,9 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
-static int uvd_v4_2_early_init(void *handle)
+static int uvd_v4_2_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v4_2_set_ring_funcs(adev);
@@ -101,10 +101,10 @@ static int uvd_v4_2_early_init(void *handle)
return 0;
}
-static int uvd_v4_2_sw_init(void *handle)
+static int uvd_v4_2_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/* UVD TRAP */
@@ -130,10 +130,10 @@ static int uvd_v4_2_sw_init(void *handle)
return r;
}
-static int uvd_v4_2_sw_fini(void *handle)
+static int uvd_v4_2_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -147,13 +147,13 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
/**
* uvd_v4_2_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v4_2_hw_init(void *handle)
+static int uvd_v4_2_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
@@ -202,13 +202,13 @@ done:
/**
* uvd_v4_2_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v4_2_hw_fini(void *handle)
+static int uvd_v4_2_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -218,17 +218,17 @@ static int uvd_v4_2_hw_fini(void *handle)
return 0;
}
-static int uvd_v4_2_prepare_suspend(void *handle)
+static int uvd_v4_2_prepare_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return amdgpu_uvd_prepare_suspend(adev);
}
-static int uvd_v4_2_suspend(void *handle)
+static int uvd_v4_2_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -254,23 +254,22 @@ static int uvd_v4_2_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v4_2_hw_fini(adev);
+ r = uvd_v4_2_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v4_2_resume(void *handle)
+static int uvd_v4_2_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v4_2_hw_init(adev);
+ return uvd_v4_2_hw_init(ip_block);
}
/**
@@ -303,7 +302,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
/* enable VCPU clock */
WREG32(mmUVD_VCPU_CNTL, 1 << 9);
- /* disable interupt */
+ /* disable interrupt */
WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
#ifdef __BIG_ENDIAN
@@ -313,6 +312,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
#endif
WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+
/* initialize UVD memory controller */
WREG32(mmUVD_LMI_CTRL, 0x203108);
@@ -659,17 +659,17 @@ static void uvd_v4_2_set_dcm(struct amdgpu_device *adev,
WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2);
}
-static bool uvd_v4_2_is_idle(void *handle)
+static bool uvd_v4_2_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v4_2_wait_for_idle(void *handle)
+static int uvd_v4_2_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -678,9 +678,9 @@ static int uvd_v4_2_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v4_2_soft_reset(void *handle)
+static int uvd_v4_2_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v4_2_stop(adev);
@@ -709,13 +709,13 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int uvd_v4_2_set_clockgating_state(void *handle,
+static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
return 0;
}
-static int uvd_v4_2_set_powergating_state(void *handle,
+static int uvd_v4_2_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -725,7 +725,7 @@ static int uvd_v4_2_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE) {
uvd_v4_2_stop(adev);
@@ -756,7 +756,6 @@ static int uvd_v4_2_set_powergating_state(void *handle,
static const struct amd_ip_funcs uvd_v4_2_ip_funcs = {
.name = "uvd_v4_2",
.early_init = uvd_v4_2_early_init,
- .late_init = NULL,
.sw_init = uvd_v4_2_sw_init,
.sw_fini = uvd_v4_2_sw_fini,
.hw_init = uvd_v4_2_hw_init,
@@ -769,8 +768,6 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = {
.soft_reset = uvd_v4_2_soft_reset,
.set_clockgating_state = uvd_v4_2_set_clockgating_state,
.set_powergating_state = uvd_v4_2_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index efd903c21d48..71409ad8b7ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -42,7 +42,7 @@ static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v5_0_start(struct amdgpu_device *adev);
static void uvd_v5_0_stop(struct amdgpu_device *adev);
-static int uvd_v5_0_set_clockgating_state(void *handle,
+static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
bool enable);
@@ -88,9 +88,9 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
-static int uvd_v5_0_early_init(void *handle)
+static int uvd_v5_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
uvd_v5_0_set_ring_funcs(adev);
@@ -99,10 +99,10 @@ static int uvd_v5_0_early_init(void *handle)
return 0;
}
-static int uvd_v5_0_sw_init(void *handle)
+static int uvd_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
/* UVD TRAP */
@@ -128,10 +128,10 @@ static int uvd_v5_0_sw_init(void *handle)
return r;
}
-static int uvd_v5_0_sw_fini(void *handle)
+static int uvd_v5_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -143,19 +143,19 @@ static int uvd_v5_0_sw_fini(void *handle)
/**
* uvd_v5_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v5_0_hw_init(void *handle)
+static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int r;
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
- uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v5_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
uvd_v5_0_enable_mgcg(adev, true);
r = amdgpu_ring_test_helper(ring);
@@ -200,13 +200,13 @@ done:
/**
* uvd_v5_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v5_0_hw_fini(void *handle)
+static int uvd_v5_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -216,17 +216,17 @@ static int uvd_v5_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v5_0_prepare_suspend(void *handle)
+static int uvd_v5_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return amdgpu_uvd_prepare_suspend(adev);
}
-static int uvd_v5_0_suspend(void *handle)
+static int uvd_v5_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -252,23 +252,22 @@ static int uvd_v5_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v5_0_hw_fini(adev);
+ r = uvd_v5_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v5_0_resume(void *handle)
+static int uvd_v5_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v5_0_hw_init(adev);
+ return uvd_v5_0_hw_init(ip_block);
}
/**
@@ -581,17 +580,17 @@ static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
}
}
-static bool uvd_v5_0_is_idle(void *handle)
+static bool uvd_v5_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v5_0_wait_for_idle(void *handle)
+static int uvd_v5_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK))
@@ -600,9 +599,9 @@ static int uvd_v5_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
-static int uvd_v5_0_soft_reset(void *handle)
+static int uvd_v5_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
uvd_v5_0_stop(adev);
@@ -791,15 +790,15 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
}
}
-static int uvd_v5_0_set_clockgating_state(void *handle,
+static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (uvd_v5_0_wait_for_idle(handle))
+ if (uvd_v5_0_wait_for_idle(ip_block))
return -EBUSY;
uvd_v5_0_enable_clock_gating(adev, true);
@@ -813,7 +812,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle,
return 0;
}
-static int uvd_v5_0_set_powergating_state(void *handle,
+static int uvd_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -823,7 +822,7 @@ static int uvd_v5_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
if (state == AMD_PG_STATE_GATE) {
@@ -838,9 +837,9 @@ out:
return ret;
}
-static void uvd_v5_0_get_clockgating_state(void *handle, u64 *flags)
+static void uvd_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -863,7 +862,6 @@ out:
static const struct amd_ip_funcs uvd_v5_0_ip_funcs = {
.name = "uvd_v5_0",
.early_init = uvd_v5_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v5_0_sw_init,
.sw_fini = uvd_v5_0_sw_fini,
.hw_init = uvd_v5_0_hw_init,
@@ -877,8 +875,6 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = {
.set_clockgating_state = uvd_v5_0_set_clockgating_state,
.set_powergating_state = uvd_v5_0_set_powergating_state,
.get_clockgating_state = uvd_v5_0_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 495de5068455..1c07b701d0e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -48,7 +48,7 @@ static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v6_0_start(struct amdgpu_device *adev);
static void uvd_v6_0_stop(struct amdgpu_device *adev);
static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev);
-static int uvd_v6_0_set_clockgating_state(void *handle,
+static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
bool enable);
@@ -354,9 +354,9 @@ error:
return r;
}
-static int uvd_v6_0_early_init(void *handle)
+static int uvd_v6_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->uvd.num_uvd_inst = 1;
if (!(adev->flags & AMD_IS_APU) &&
@@ -375,11 +375,11 @@ static int uvd_v6_0_early_init(void *handle)
return 0;
}
-static int uvd_v6_0_sw_init(void *handle)
+static int uvd_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* UVD TRAP */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);
@@ -435,10 +435,10 @@ static int uvd_v6_0_sw_init(void *handle)
return r;
}
-static int uvd_v6_0_sw_fini(void *handle)
+static int uvd_v6_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -455,19 +455,19 @@ static int uvd_v6_0_sw_fini(void *handle)
/**
* uvd_v6_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v6_0_hw_init(void *handle)
+static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->uvd.inst->ring;
uint32_t tmp;
int i, r;
amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
- uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v6_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE);
uvd_v6_0_enable_mgcg(adev, true);
r = amdgpu_ring_test_helper(ring);
@@ -524,13 +524,13 @@ done:
/**
* uvd_v6_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v6_0_hw_fini(void *handle)
+static int uvd_v6_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -540,17 +540,17 @@ static int uvd_v6_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v6_0_prepare_suspend(void *handle)
+static int uvd_v6_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return amdgpu_uvd_prepare_suspend(adev);
}
-static int uvd_v6_0_suspend(void *handle)
+static int uvd_v6_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -576,23 +576,22 @@ static int uvd_v6_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v6_0_hw_fini(adev);
+ r = uvd_v6_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v6_0_resume(void *handle)
+static int uvd_v6_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v6_0_hw_init(adev);
+ return uvd_v6_0_hw_init(ip_block);
}
/**
@@ -1144,29 +1143,29 @@ static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, vmid);
}
-static bool uvd_v6_0_is_idle(void *handle)
+static bool uvd_v6_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
}
-static int uvd_v6_0_wait_for_idle(void *handle)
+static int uvd_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++) {
- if (uvd_v6_0_is_idle(handle))
+ if (uvd_v6_0_is_idle(ip_block))
return 0;
}
return -ETIMEDOUT;
}
#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd
-static bool uvd_v6_0_check_soft_reset(void *handle)
+static bool uvd_v6_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
u32 tmp = RREG32(mmSRBM_STATUS);
@@ -1184,9 +1183,9 @@ static bool uvd_v6_0_check_soft_reset(void *handle)
}
}
-static int uvd_v6_0_pre_soft_reset(void *handle)
+static int uvd_v6_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->uvd.inst->srbm_soft_reset)
return 0;
@@ -1195,9 +1194,9 @@ static int uvd_v6_0_pre_soft_reset(void *handle)
return 0;
}
-static int uvd_v6_0_soft_reset(void *handle)
+static int uvd_v6_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->uvd.inst->srbm_soft_reset)
@@ -1226,9 +1225,9 @@ static int uvd_v6_0_soft_reset(void *handle)
return 0;
}
-static int uvd_v6_0_post_soft_reset(void *handle)
+static int uvd_v6_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->uvd.inst->srbm_soft_reset)
return 0;
@@ -1451,15 +1450,15 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
}
}
-static int uvd_v6_0_set_clockgating_state(void *handle,
+static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (uvd_v6_0_wait_for_idle(handle))
+ if (uvd_v6_0_wait_for_idle(ip_block))
return -EBUSY;
uvd_v6_0_enable_clock_gating(adev, true);
/* enable HW gates because UVD is idle */
@@ -1472,7 +1471,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
return 0;
}
-static int uvd_v6_0_set_powergating_state(void *handle,
+static int uvd_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the UVD block.
@@ -1482,7 +1481,7 @@ static int uvd_v6_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
WREG32(mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
@@ -1499,9 +1498,9 @@ out:
return ret;
}
-static void uvd_v6_0_get_clockgating_state(void *handle, u64 *flags)
+static void uvd_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -1528,7 +1527,6 @@ out:
static const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
.name = "uvd_v6_0",
.early_init = uvd_v6_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v6_0_sw_init,
.sw_fini = uvd_v6_0_sw_fini,
.hw_init = uvd_v6_0_hw_init,
@@ -1545,8 +1543,6 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
.set_clockgating_state = uvd_v6_0_set_clockgating_state,
.set_powergating_state = uvd_v6_0_set_powergating_state,
.get_clockgating_state = uvd_v6_0_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 6068b784dc69..9d237b5937fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -361,9 +361,9 @@ error:
return r;
}
-static int uvd_v7_0_early_init(void *handle)
+static int uvd_v7_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->asic_type == CHIP_VEGA20) {
u32 harvest;
@@ -395,12 +395,12 @@ static int uvd_v7_0_early_init(void *handle)
return 0;
}
-static int uvd_v7_0_sw_init(void *handle)
+static int uvd_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, j, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
if (adev->uvd.harvest_config & (1 << j))
@@ -487,10 +487,10 @@ static int uvd_v7_0_sw_init(void *handle)
return r;
}
-static int uvd_v7_0_sw_fini(void *handle)
+static int uvd_v7_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, j, r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_virt_free_mm_table(adev);
@@ -510,13 +510,13 @@ static int uvd_v7_0_sw_fini(void *handle)
/**
* uvd_v7_0_hw_init - start and test UVD block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int uvd_v7_0_hw_init(void *handle)
+static int uvd_v7_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
uint32_t tmp;
int i, j, r;
@@ -588,13 +588,13 @@ done:
/**
* uvd_v7_0_hw_fini - stop the hardware block
*
- * @handle: handle used to pass amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the UVD block, mark ring as not ready any more
*/
-static int uvd_v7_0_hw_fini(void *handle)
+static int uvd_v7_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->uvd.idle_work);
@@ -608,17 +608,17 @@ static int uvd_v7_0_hw_fini(void *handle)
return 0;
}
-static int uvd_v7_0_prepare_suspend(void *handle)
+static int uvd_v7_0_prepare_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return amdgpu_uvd_prepare_suspend(adev);
}
-static int uvd_v7_0_suspend(void *handle)
+static int uvd_v7_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -644,23 +644,22 @@ static int uvd_v7_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = uvd_v7_0_hw_fini(adev);
+ r = uvd_v7_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_uvd_suspend(adev);
}
-static int uvd_v7_0_resume(void *handle)
+static int uvd_v7_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_uvd_resume(adev);
+ r = amdgpu_uvd_resume(ip_block->adev);
if (r)
return r;
- return uvd_v7_0_hw_init(adev);
+ return uvd_v7_0_hw_init(ip_block);
}
/**
@@ -1289,7 +1288,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
unsigned i;
/* No patching necessary for the first instance */
@@ -1463,104 +1462,6 @@ static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
-#if 0
-static bool uvd_v7_0_is_idle(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK);
-}
-
-static int uvd_v7_0_wait_for_idle(void *handle)
-{
- unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- for (i = 0; i < adev->usec_timeout; i++) {
- if (uvd_v7_0_is_idle(handle))
- return 0;
- }
- return -ETIMEDOUT;
-}
-
-#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd
-static bool uvd_v7_0_check_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset = 0;
- u32 tmp = RREG32(mmSRBM_STATUS);
-
- if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
- REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
- (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
- AMDGPU_UVD_STATUS_BUSY_MASK))
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
- SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
-
- if (srbm_soft_reset) {
- adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
- return true;
- } else {
- adev->uvd.inst[ring->me].srbm_soft_reset = 0;
- return false;
- }
-}
-
-static int uvd_v7_0_pre_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
-
- uvd_v7_0_stop(adev);
- return 0;
-}
-
-static int uvd_v7_0_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
- srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
-
- if (srbm_soft_reset) {
- u32 tmp;
-
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
-
- return 0;
-}
-
-static int uvd_v7_0_post_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->uvd.inst[ring->me].srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return uvd_v7_0_start(adev);
-}
-#endif
-
static int uvd_v7_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -1610,172 +1511,7 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-#if 0
-static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev)
-{
- uint32_t data, data1, data2, suvd_flags;
-
- data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
- data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
- data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
-
- data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
- UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
-
- suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
- UVD_SUVD_CGC_GATE__SIT_MASK |
- UVD_SUVD_CGC_GATE__SMP_MASK |
- UVD_SUVD_CGC_GATE__SCM_MASK |
- UVD_SUVD_CGC_GATE__SDB_MASK;
-
- data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
- (1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) |
- (4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY));
-
- data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
- UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
- UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
- UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
- UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
- UVD_CGC_CTRL__SYS_MODE_MASK |
- UVD_CGC_CTRL__UDEC_MODE_MASK |
- UVD_CGC_CTRL__MPEG2_MODE_MASK |
- UVD_CGC_CTRL__REGS_MODE_MASK |
- UVD_CGC_CTRL__RBC_MODE_MASK |
- UVD_CGC_CTRL__LMI_MC_MODE_MASK |
- UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
- UVD_CGC_CTRL__IDCT_MODE_MASK |
- UVD_CGC_CTRL__MPRD_MODE_MASK |
- UVD_CGC_CTRL__MPC_MODE_MASK |
- UVD_CGC_CTRL__LBSI_MODE_MASK |
- UVD_CGC_CTRL__LRBBM_MODE_MASK |
- UVD_CGC_CTRL__WCB_MODE_MASK |
- UVD_CGC_CTRL__VCPU_MODE_MASK |
- UVD_CGC_CTRL__JPEG_MODE_MASK |
- UVD_CGC_CTRL__JPEG2_MODE_MASK |
- UVD_CGC_CTRL__SCPU_MODE_MASK);
- data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SIT_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
- UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
- data1 |= suvd_flags;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
-}
-
-static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
-{
- uint32_t data, data1, cgc_flags, suvd_flags;
-
- data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
- data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
-
- cgc_flags = UVD_CGC_GATE__SYS_MASK |
- UVD_CGC_GATE__UDEC_MASK |
- UVD_CGC_GATE__MPEG2_MASK |
- UVD_CGC_GATE__RBC_MASK |
- UVD_CGC_GATE__LMI_MC_MASK |
- UVD_CGC_GATE__IDCT_MASK |
- UVD_CGC_GATE__MPRD_MASK |
- UVD_CGC_GATE__MPC_MASK |
- UVD_CGC_GATE__LBSI_MASK |
- UVD_CGC_GATE__LRBBM_MASK |
- UVD_CGC_GATE__UDEC_RE_MASK |
- UVD_CGC_GATE__UDEC_CM_MASK |
- UVD_CGC_GATE__UDEC_IT_MASK |
- UVD_CGC_GATE__UDEC_DB_MASK |
- UVD_CGC_GATE__UDEC_MP_MASK |
- UVD_CGC_GATE__WCB_MASK |
- UVD_CGC_GATE__VCPU_MASK |
- UVD_CGC_GATE__SCPU_MASK |
- UVD_CGC_GATE__JPEG_MASK |
- UVD_CGC_GATE__JPEG2_MASK;
-
- suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
- UVD_SUVD_CGC_GATE__SIT_MASK |
- UVD_SUVD_CGC_GATE__SMP_MASK |
- UVD_SUVD_CGC_GATE__SCM_MASK |
- UVD_SUVD_CGC_GATE__SDB_MASK;
-
- data |= cgc_flags;
- data1 |= suvd_flags;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
- WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
-}
-
-static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
-{
- u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
-
- if (enable)
- tmp |= (GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
- else
- tmp &= ~(GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
-
- WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
-}
-
-
-static int uvd_v7_0_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE);
-
- uvd_v7_0_set_bypass_mode(adev, enable);
-
- if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
- return 0;
-
- if (enable) {
- /* disable HW gating and enable Sw gating */
- uvd_v7_0_set_sw_clock_gating(adev);
- } else {
- /* wait for STATUS to clear */
- if (uvd_v7_0_wait_for_idle(handle))
- return -EBUSY;
-
- /* enable HW gates because UVD is idle */
- /* uvd_v7_0_set_hw_clock_gating(adev); */
- }
-
- return 0;
-}
-
-static int uvd_v7_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
-{
- /* This doesn't actually powergate the UVD block.
- * That's done in the dpm code via the SMC. This
- * just re-inits the block as necessary. The actual
- * gating still happens in the dpm code. We should
- * revisit this when there is a cleaner line between
- * the smc and the hw blocks
- */
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
- return 0;
-
- WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
-
- if (state == AMD_PG_STATE_GATE) {
- uvd_v7_0_stop(adev);
- return 0;
- } else {
- return uvd_v7_0_start(adev);
- }
-}
-#endif
-
-static int uvd_v7_0_set_clockgating_state(void *handle,
+static int uvd_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* needed for driver unload*/
@@ -1785,7 +1521,6 @@ static int uvd_v7_0_set_clockgating_state(void *handle,
const struct amd_ip_funcs uvd_v7_0_ip_funcs = {
.name = "uvd_v7_0",
.early_init = uvd_v7_0_early_init,
- .late_init = NULL,
.sw_init = uvd_v7_0_sw_init,
.sw_fini = uvd_v7_0_sw_fini,
.hw_init = uvd_v7_0_hw_init,
@@ -1793,12 +1528,6 @@ const struct amd_ip_funcs uvd_v7_0_ip_funcs = {
.prepare_suspend = uvd_v7_0_prepare_suspend,
.suspend = uvd_v7_0_suspend,
.resume = uvd_v7_0_resume,
- .is_idle = NULL /* uvd_v7_0_is_idle */,
- .wait_for_idle = NULL /* uvd_v7_0_wait_for_idle */,
- .check_soft_reset = NULL /* uvd_v7_0_check_soft_reset */,
- .pre_soft_reset = NULL /* uvd_v7_0_pre_soft_reset */,
- .soft_reset = NULL /* uvd_v7_0_soft_reset */,
- .post_soft_reset = NULL /* uvd_v7_0_post_soft_reset */,
.set_clockgating_state = uvd_v7_0_set_clockgating_state,
.set_powergating_state = NULL /* uvd_v7_0_set_powergating_state */,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 66fada199bda..bee3e904a6bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -201,20 +201,20 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
}
-static bool vce_v2_0_is_idle(void *handle)
+static bool vce_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return !(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
}
-static int vce_v2_0_wait_for_idle(void *handle)
+static int vce_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
unsigned i;
for (i = 0; i < adev->usec_timeout; i++) {
- if (vce_v2_0_is_idle(handle))
+ if (vce_v2_0_is_idle(ip_block))
return 0;
}
return -ETIMEDOUT;
@@ -274,15 +274,21 @@ static int vce_v2_0_start(struct amdgpu_device *adev)
static int vce_v2_0_stop(struct amdgpu_device *adev)
{
+ struct amdgpu_ip_block *ip_block;
int i;
int status;
+
if (vce_v2_0_lmi_clean(adev)) {
- DRM_INFO("vce is not idle \n");
+ DRM_INFO("VCE is not idle \n");
return 0;
}
- if (vce_v2_0_wait_for_idle(adev)) {
+ ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE);
+ if (!ip_block)
+ return -EINVAL;
+
+ if (vce_v2_0_wait_for_idle(ip_block)) {
DRM_INFO("VCE is busy, Can't set clock gating");
return 0;
}
@@ -398,9 +404,9 @@ static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable,
}
}
-static int vce_v2_0_early_init(void *handle)
+static int vce_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->vce.num_rings = 2;
@@ -410,11 +416,11 @@ static int vce_v2_0_early_init(void *handle)
return 0;
}
-static int vce_v2_0_sw_init(void *handle)
+static int vce_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* VCE */
r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
@@ -444,10 +450,10 @@ static int vce_v2_0_sw_init(void *handle)
return r;
}
-static int vce_v2_0_sw_fini(void *handle)
+static int vce_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_vce_suspend(adev);
if (r)
@@ -456,10 +462,10 @@ static int vce_v2_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v2_0_hw_init(void *handle)
+static int vce_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
vce_v2_0_enable_mgcg(adev, true, false);
@@ -475,19 +481,17 @@ static int vce_v2_0_hw_init(void *handle)
return 0;
}
-static int vce_v2_0_hw_fini(void *handle)
+static int vce_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- cancel_delayed_work_sync(&adev->vce.idle_work);
+ cancel_delayed_work_sync(&ip_block->adev->vce.idle_work);
return 0;
}
-static int vce_v2_0_suspend(void *handle)
+static int vce_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
@@ -513,28 +517,27 @@ static int vce_v2_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = vce_v2_0_hw_fini(adev);
+ r = vce_v2_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v2_0_resume(void *handle)
+static int vce_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vce_resume(adev);
+ r = amdgpu_vce_resume(ip_block->adev);
if (r)
return r;
- return vce_v2_0_hw_init(adev);
+ return vce_v2_0_hw_init(ip_block);
}
-static int vce_v2_0_soft_reset(void *handle)
+static int vce_v2_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
WREG32_FIELD(SRBM_SOFT_RESET, SOFT_RESET_VCE, 1);
mdelay(5);
@@ -575,13 +578,13 @@ static int vce_v2_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vce_v2_0_set_clockgating_state(void *handle,
+static int vce_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
bool gate = false;
bool sw_cg = false;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_CG_STATE_GATE) {
gate = true;
@@ -593,7 +596,7 @@ static int vce_v2_0_set_clockgating_state(void *handle,
return 0;
}
-static int vce_v2_0_set_powergating_state(void *handle,
+static int vce_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -603,7 +606,7 @@ static int vce_v2_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
return vce_v2_0_stop(adev);
@@ -614,7 +617,6 @@ static int vce_v2_0_set_powergating_state(void *handle,
static const struct amd_ip_funcs vce_v2_0_ip_funcs = {
.name = "vce_v2_0",
.early_init = vce_v2_0_early_init,
- .late_init = NULL,
.sw_init = vce_v2_0_sw_init,
.sw_fini = vce_v2_0_sw_fini,
.hw_init = vce_v2_0_hw_init,
@@ -626,8 +628,6 @@ static const struct amd_ip_funcs vce_v2_0_ip_funcs = {
.soft_reset = vce_v2_0_soft_reset,
.set_clockgating_state = vce_v2_0_set_clockgating_state,
.set_powergating_state = vce_v2_0_set_powergating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 4bfba2931b08..708123899c41 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -64,8 +64,8 @@
static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vce_v3_0_wait_for_idle(void *handle);
-static int vce_v3_0_set_clockgating_state(void *handle,
+static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
+static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state);
/**
* vce_v3_0_ring_get_rptr - get read pointer
@@ -396,9 +396,9 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
}
}
-static int vce_v3_0_early_init(void *handle)
+static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
@@ -415,9 +415,9 @@ static int vce_v3_0_early_init(void *handle)
return 0;
}
-static int vce_v3_0_sw_init(void *handle)
+static int vce_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int r, i;
@@ -453,10 +453,10 @@ static int vce_v3_0_sw_init(void *handle)
return r;
}
-static int vce_v3_0_sw_fini(void *handle)
+static int vce_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = amdgpu_vce_suspend(adev);
if (r)
@@ -465,10 +465,10 @@ static int vce_v3_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v3_0_hw_init(void *handle)
+static int vce_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vce_v3_0_override_vce_clock_gating(adev, true);
@@ -485,25 +485,25 @@ static int vce_v3_0_hw_init(void *handle)
return 0;
}
-static int vce_v3_0_hw_fini(void *handle)
+static int vce_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->vce.idle_work);
- r = vce_v3_0_wait_for_idle(handle);
+ r = vce_v3_0_wait_for_idle(ip_block);
if (r)
return r;
vce_v3_0_stop(adev);
- return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
+ return vce_v3_0_set_clockgating_state(ip_block, AMD_CG_STATE_GATE);
}
-static int vce_v3_0_suspend(void *handle)
+static int vce_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Proper cleanups before halting the HW engine:
@@ -528,23 +528,22 @@ static int vce_v3_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = vce_v3_0_hw_fini(adev);
+ r = vce_v3_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v3_0_resume(void *handle)
+static int vce_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vce_resume(adev);
+ r = amdgpu_vce_resume(ip_block->adev);
if (r)
return r;
- return vce_v3_0_hw_init(adev);
+ return vce_v3_0_hw_init(ip_block);
}
static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
@@ -598,9 +597,9 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
}
-static bool vce_v3_0_is_idle(void *handle)
+static bool vce_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 mask = 0;
mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
@@ -609,13 +608,13 @@ static bool vce_v3_0_is_idle(void *handle)
return !(RREG32(mmSRBM_STATUS2) & mask);
}
-static int vce_v3_0_wait_for_idle(void *handle)
+static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (i = 0; i < adev->usec_timeout; i++)
- if (vce_v3_0_is_idle(handle))
+ if (vce_v3_0_is_idle(ip_block))
return 0;
return -ETIMEDOUT;
@@ -627,9 +626,9 @@ static int vce_v3_0_wait_for_idle(void *handle)
#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
-static bool vce_v3_0_check_soft_reset(void *handle)
+static bool vce_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset = 0;
/* According to VCE team , we should use VCE_STATUS instead
@@ -668,9 +667,9 @@ static bool vce_v3_0_check_soft_reset(void *handle)
}
}
-static int vce_v3_0_soft_reset(void *handle)
+static int vce_v3_0_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
u32 srbm_soft_reset;
if (!adev->vce.srbm_soft_reset)
@@ -699,29 +698,29 @@ static int vce_v3_0_soft_reset(void *handle)
return 0;
}
-static int vce_v3_0_pre_soft_reset(void *handle)
+static int vce_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->vce.srbm_soft_reset)
return 0;
mdelay(5);
- return vce_v3_0_suspend(adev);
+ return vce_v3_0_suspend(ip_block);
}
-static int vce_v3_0_post_soft_reset(void *handle)
+static int vce_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (!adev->vce.srbm_soft_reset)
return 0;
mdelay(5);
- return vce_v3_0_resume(adev);
+ return vce_v3_0_resume(ip_block);
}
static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -761,10 +760,10 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static int vce_v3_0_set_clockgating_state(void *handle,
+static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
int i;
@@ -802,7 +801,7 @@ static int vce_v3_0_set_clockgating_state(void *handle,
return 0;
}
-static int vce_v3_0_set_powergating_state(void *handle,
+static int vce_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -812,7 +811,7 @@ static int vce_v3_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret = 0;
if (state == AMD_PG_STATE_GATE) {
@@ -829,9 +828,9 @@ out:
return ret;
}
-static void vce_v3_0_get_clockgating_state(void *handle, u64 *flags)
+static void vce_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
mutex_lock(&adev->pm.mutex);
@@ -897,7 +896,6 @@ static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
.name = "vce_v3_0",
.early_init = vce_v3_0_early_init,
- .late_init = NULL,
.sw_init = vce_v3_0_sw_init,
.sw_fini = vce_v3_0_sw_fini,
.hw_init = vce_v3_0_hw_init,
@@ -913,8 +911,6 @@ static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
.set_clockgating_state = vce_v3_0_set_clockgating_state,
.set_powergating_state = vce_v3_0_set_powergating_state,
.get_clockgating_state = vce_v3_0_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 0748bf44c880..335bda64ff5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -407,9 +407,9 @@ static int vce_v4_0_stop(struct amdgpu_device *adev)
return 0;
}
-static int vce_v4_0_early_init(void *handle)
+static int vce_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
adev->vce.num_rings = 1;
@@ -422,9 +422,9 @@ static int vce_v4_0_early_init(void *handle)
return 0;
}
-static int vce_v4_0_sw_init(void *handle)
+static int vce_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
unsigned size;
@@ -493,10 +493,10 @@ static int vce_v4_0_sw_init(void *handle)
return r;
}
-static int vce_v4_0_sw_fini(void *handle)
+static int vce_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* free MM table */
amdgpu_virt_free_mm_table(adev);
@@ -513,10 +513,10 @@ static int vce_v4_0_sw_fini(void *handle)
return amdgpu_vce_sw_fini(adev);
}
-static int vce_v4_0_hw_init(void *handle)
+static int vce_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
int r, i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
r = vce_v4_0_sriov_start(adev);
@@ -536,14 +536,14 @@ static int vce_v4_0_hw_init(void *handle)
return 0;
}
-static int vce_v4_0_hw_fini(void *handle)
+static int vce_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
cancel_delayed_work_sync(&adev->vce.idle_work);
if (!amdgpu_sriov_vf(adev)) {
- /* vce_v4_0_wait_for_idle(handle); */
+ /* vce_v4_0_wait_for_idle(ip_block); */
vce_v4_0_stop(adev);
} else {
/* full access mode, so don't touch any VCE register */
@@ -553,9 +553,9 @@ static int vce_v4_0_hw_fini(void *handle)
return 0;
}
-static int vce_v4_0_suspend(void *handle)
+static int vce_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r, idx;
if (adev->vce.vcpu_bo == NULL)
@@ -594,16 +594,16 @@ static int vce_v4_0_suspend(void *handle)
AMD_CG_STATE_GATE);
}
- r = vce_v4_0_hw_fini(adev);
+ r = vce_v4_0_hw_fini(ip_block);
if (r)
return r;
return amdgpu_vce_suspend(adev);
}
-static int vce_v4_0_resume(void *handle)
+static int vce_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r, idx;
if (adev->vce.vcpu_bo == NULL)
@@ -624,7 +624,7 @@ static int vce_v4_0_resume(void *handle)
return r;
}
- return vce_v4_0_hw_init(adev);
+ return vce_v4_0_hw_init(ip_block);
}
static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
@@ -684,281 +684,14 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
}
-static int vce_v4_0_set_clockgating_state(void *handle,
+static int vce_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
/* needed for driver unload*/
return 0;
}
-#if 0
-static bool vce_v4_0_is_idle(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 mask = 0;
-
- mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
- mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
-
- return !(RREG32(mmSRBM_STATUS2) & mask);
-}
-
-static int vce_v4_0_wait_for_idle(void *handle)
-{
- unsigned i;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- for (i = 0; i < adev->usec_timeout; i++)
- if (vce_v4_0_is_idle(handle))
- return 0;
-
- return -ETIMEDOUT;
-}
-
-#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */
-#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */
-#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */
-#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
- VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
-
-static bool vce_v4_0_check_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset = 0;
-
- /* According to VCE team , we should use VCE_STATUS instead
- * SRBM_STATUS.VCE_BUSY bit for busy status checking.
- * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
- * instance's registers are accessed
- * (0 for 1st instance, 10 for 2nd instance).
- *
- *VCE_STATUS
- *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB |
- *|----+----+-----------+----+----+----+----------+---------+----|
- *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0|
- *
- * VCE team suggest use bit 3--bit 6 for busy status check
- */
- mutex_lock(&adev->grbm_idx_mutex);
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
- if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
- }
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
- if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
- srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
- }
- WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- if (srbm_soft_reset) {
- adev->vce.srbm_soft_reset = srbm_soft_reset;
- return true;
- } else {
- adev->vce.srbm_soft_reset = 0;
- return false;
- }
-}
-
-static int vce_v4_0_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 srbm_soft_reset;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
- srbm_soft_reset = adev->vce.srbm_soft_reset;
-
- if (srbm_soft_reset) {
- u32 tmp;
-
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- /* Wait a little for things to settle down */
- udelay(50);
- }
-
- return 0;
-}
-
-static int vce_v4_0_pre_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return vce_v4_0_suspend(adev);
-}
-
-
-static int vce_v4_0_post_soft_reset(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (!adev->vce.srbm_soft_reset)
- return 0;
-
- mdelay(5);
-
- return vce_v4_0_resume(adev);
-}
-
-static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
-{
- u32 tmp, data;
-
- tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
- if (override)
- data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
- else
- data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
-
- if (tmp != data)
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
-}
-
-static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
- bool gated)
-{
- u32 data;
-
- /* Set Override to disable Clock Gating */
- vce_v4_0_override_vce_clock_gating(adev, true);
-
- /* This function enables MGCG which is controlled by firmware.
- With the clocks in the gated state the core is still
- accessible but the firmware will throttle the clocks on the
- fly as necessary.
- */
- if (gated) {
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
- data |= 0x1ff;
- data &= ~0xef0000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
- data |= 0x3ff000;
- data &= ~0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
- data |= 0x2;
- data &= ~0x00010000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
- data |= 0x37f;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
- data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
- 0x8;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
- } else {
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
- data &= ~0x80010;
- data |= 0xe70008;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
- data |= 0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
- data |= 0x10000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
- data &= ~0xffc00000;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
-
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
- data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
- VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK |
- 0x8);
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
- }
- vce_v4_0_override_vce_clock_gating(adev, false);
-}
-
-static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
-{
- u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
-
- if (enable)
- tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
- else
- tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
-
- WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
-}
-
-static int vce_v4_0_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- bool enable = (state == AMD_CG_STATE_GATE);
- int i;
-
- if ((adev->asic_type == CHIP_POLARIS10) ||
- (adev->asic_type == CHIP_TONGA) ||
- (adev->asic_type == CHIP_FIJI))
- vce_v4_0_set_bypass_mode(adev, enable);
-
- if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
- return 0;
-
- mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < 2; i++) {
- /* Program VCE Instance 0 or 1 if not harvested */
- if (adev->vce.harvest_config & (1 << i))
- continue;
-
- WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
-
- if (enable) {
- /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
- uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
- data &= ~(0xf | 0xff0);
- data |= ((0x0 << 0) | (0x04 << 4));
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
-
- /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
- data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
- data &= ~(0xf | 0xff0);
- data |= ((0x0 << 0) | (0x04 << 4));
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
- }
-
- vce_v4_0_set_vce_sw_clock_gating(adev, enable);
- }
-
- WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
- mutex_unlock(&adev->grbm_idx_mutex);
-
- return 0;
-}
-#endif
-
-static int vce_v4_0_set_powergating_state(void *handle,
+static int vce_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCE block.
@@ -968,7 +701,7 @@ static int vce_v4_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (state == AMD_PG_STATE_GATE)
return vce_v4_0_stop(adev);
@@ -1076,19 +809,12 @@ static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
const struct amd_ip_funcs vce_v4_0_ip_funcs = {
.name = "vce_v4_0",
.early_init = vce_v4_0_early_init,
- .late_init = NULL,
.sw_init = vce_v4_0_sw_init,
.sw_fini = vce_v4_0_sw_fini,
.hw_init = vce_v4_0_hw_init,
.hw_fini = vce_v4_0_hw_fini,
.suspend = vce_v4_0_suspend,
.resume = vce_v4_0_resume,
- .is_idle = NULL /* vce_v4_0_is_idle */,
- .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
- .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
- .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
- .soft_reset = NULL /* vce_v4_0_soft_reset */,
- .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
.set_clockgating_state = vce_v4_0_set_clockgating_state,
.set_powergating_state = vce_v4_0_set_powergating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index ecdfbfefd66a..c74947705d77 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -81,13 +81,14 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_1_0[] = {
SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
};
-static int vcn_v1_0_stop(struct amdgpu_device *adev);
+static int vcn_v1_0_stop(struct amdgpu_vcn_inst *vinst);
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
-static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v1_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static void vcn_v1_0_idle_work_handler(struct work_struct *work);
static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
@@ -95,40 +96,41 @@ static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring);
/**
* vcn_v1_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v1_0_early_init(void *handle)
+static int vcn_v1_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- adev->vcn.num_enc_rings = 2;
+ adev->vcn.inst[0].num_enc_rings = 2;
+ adev->vcn.inst[0].set_pg_state = vcn_v1_0_set_pg_state;
vcn_v1_0_set_dec_ring_funcs(adev);
vcn_v1_0_set_enc_ring_funcs(adev);
vcn_v1_0_set_irq_funcs(adev);
- jpeg_v1_0_early_init(handle);
+ jpeg_v1_0_early_init(ip_block);
- return amdgpu_vcn_early_init(adev);
+ return amdgpu_vcn_early_init(adev, 0);
}
/**
* vcn_v1_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v1_0_sw_init(void *handle)
+static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0);
uint32_t *ptr;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
@@ -137,23 +139,23 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
/* VCN ENC TRAP */
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
&adev->vcn.inst->irq);
if (r)
return r;
}
- r = amdgpu_vcn_sw_init(adev);
+ r = amdgpu_vcn_sw_init(adev, 0);
if (r)
return r;
/* Override the work func */
- adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler;
+ adev->vcn.inst[0].idle_work.work.func = vcn_v1_0_idle_work_handler;
- amdgpu_vcn_setup_ucode(adev);
+ amdgpu_vcn_setup_ucode(adev, 0);
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(adev, 0);
if (r)
return r;
@@ -165,18 +167,18 @@ static int vcn_v1_0_sw_init(void *handle)
if (r)
return r;
- adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 =
+ adev->vcn.inst[0].internal.scratch9 = adev->vcn.inst->external.scratch9 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = adev->vcn.inst->external.data0 =
+ adev->vcn.inst[0].internal.data0 = adev->vcn.inst->external.data0 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = adev->vcn.inst->external.data1 =
+ adev->vcn.inst[0].internal.data1 = adev->vcn.inst->external.data1 =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = adev->vcn.inst->external.cmd =
+ adev->vcn.inst[0].internal.cmd = adev->vcn.inst->external.cmd =
SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = adev->vcn.inst->external.nop =
+ adev->vcn.inst[0].internal.nop = adev->vcn.inst->external.nop =
SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
ring = &adev->vcn.inst->ring_enc[i];
@@ -188,7 +190,7 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
}
- adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
+ adev->vcn.inst[0].pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
if (amdgpu_vcnfw_log) {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
@@ -197,7 +199,7 @@ static int vcn_v1_0_sw_init(void *handle)
amdgpu_vcn_fwlog_init(adev->vcn.inst);
}
- r = jpeg_v1_0_sw_init(handle);
+ r = jpeg_v1_0_sw_init(ip_block);
/* Allocate memory for VCN IP Dump buffer */
ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL);
@@ -213,22 +215,22 @@ static int vcn_v1_0_sw_init(void *handle)
/**
* vcn_v1_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v1_0_sw_fini(void *handle)
+static int vcn_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
if (r)
return r;
- jpeg_v1_0_sw_fini(handle);
+ jpeg_v1_0_sw_fini(ip_block);
- r = amdgpu_vcn_sw_fini(adev);
+ r = amdgpu_vcn_sw_fini(adev, 0);
kfree(adev->vcn.ip_dump);
@@ -238,13 +240,13 @@ static int vcn_v1_0_sw_fini(void *handle)
/**
* vcn_v1_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v1_0_hw_init(void *handle)
+static int vcn_v1_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i, r;
@@ -252,7 +254,7 @@ static int vcn_v1_0_hw_init(void *handle)
if (r)
return r;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
ring = &adev->vcn.inst->ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
@@ -268,20 +270,21 @@ static int vcn_v1_0_hw_init(void *handle)
/**
* vcn_v1_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v1_0_hw_fini(void *handle)
+static int vcn_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&vinst->idle_work);
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, 0, mmUVD_STATUS))) {
- vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
}
return 0;
@@ -290,27 +293,27 @@ static int vcn_v1_0_hw_fini(void *handle)
/**
* vcn_v1_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v1_0_suspend(void *handle)
+static int vcn_v1_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool idle_work_unexecuted;
- idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work);
+ idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
if (idle_work_unexecuted) {
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
}
- r = vcn_v1_0_hw_fini(adev);
+ r = vcn_v1_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
return r;
}
@@ -318,20 +321,19 @@ static int vcn_v1_0_suspend(void *handle)
/**
* vcn_v1_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v1_0_resume(void *handle)
+static int vcn_v1_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(ip_block->adev, 0);
if (r)
return r;
- r = vcn_v1_0_hw_init(adev);
+ r = vcn_v1_0_hw_init(ip_block);
return r;
}
@@ -339,13 +341,14 @@ static int vcn_v1_0_resume(void *handle)
/**
* vcn_v1_0_mc_resume_spg_mode - memory controller programming
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
+static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -410,9 +413,10 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
adev->gfx.config.gb_addr_config);
}
-static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
+static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -485,12 +489,13 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
/**
* vcn_v1_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v1_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data;
/* JPEG disable CGC */
@@ -611,12 +616,13 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
/**
* vcn_v1_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: Pointer to the VCN instance structure
*
* Enable clock gating for VCN block
*/
-static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v1_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
/* enable JPEG CGC */
@@ -680,8 +686,10 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel)
+static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t reg_data = 0;
/* disable JPEG CGC */
@@ -734,8 +742,9 @@ static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t s
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel);
}
-static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_1_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -779,8 +788,9 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
}
-static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_1_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -823,12 +833,13 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
/**
* vcn_v1_0_start_spg_mode - start VCN block
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Setup and start the VCN block
*/
-static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_start_spg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
@@ -837,13 +848,13 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
/* disable byte swapping */
lmi_swap_cntl = 0;
- vcn_1_0_disable_static_power_gating(adev);
+ vcn_1_0_disable_static_power_gating(vinst);
tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
/* disable clock gating */
- vcn_v1_0_disable_clock_gating(adev);
+ vcn_v1_0_disable_clock_gating(vinst);
/* disable interupt */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
@@ -885,7 +896,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- vcn_v1_0_mc_resume_spg_mode(adev);
+ vcn_v1_0_mc_resume_spg_mode(vinst);
WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10);
WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0,
@@ -998,11 +1009,17 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
jpeg_v1_0_start(adev, 0);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
uint32_t lmi_swap_cntl;
@@ -1010,7 +1027,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
/* disable byte swapping */
lmi_swap_cntl = 0;
- vcn_1_0_enable_static_power_gating(adev);
+ vcn_1_0_enable_static_power_gating(vinst);
/* enable dynamic power gating mode */
tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
@@ -1019,7 +1036,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
/* enable clock gating */
- vcn_v1_0_clock_gating_dpg_mode(adev, 0);
+ vcn_v1_0_clock_gating_dpg_mode(vinst, 0);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -1068,7 +1085,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0xFFFFFFFF, 0);
- vcn_v1_0_mc_resume_dpg_mode(adev);
+ vcn_v1_0_mc_resume_dpg_mode(vinst);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0);
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0);
@@ -1085,7 +1102,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN,
UVD_MASTINT_EN__VCPU_EN_MASK, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
- vcn_v1_0_clock_gating_dpg_mode(adev, 1);
+ vcn_v1_0_clock_gating_dpg_mode(vinst, 1);
/* setup mmUVD_LMI_CTRL */
WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL,
(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
@@ -1142,24 +1159,32 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
jpeg_v1_0_start(adev, 1);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_start(struct amdgpu_device *adev)
+static int vcn_v1_0_start(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+
return (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ?
- vcn_v1_0_start_dpg_mode(adev) : vcn_v1_0_start_spg_mode(adev);
+ vcn_v1_0_start_dpg_mode(vinst) : vcn_v1_0_start_spg_mode(vinst);
}
/**
* vcn_v1_0_stop_spg_mode - stop VCN block
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* stop the VCN block
*/
-static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_stop_spg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
int tmp;
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
@@ -1199,13 +1224,20 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0);
- vcn_v1_0_enable_clock_gating(adev);
- vcn_1_0_enable_static_power_gating(adev);
+ vcn_v1_0_enable_clock_gating(vinst);
+ vcn_1_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v1_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t tmp;
/* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */
@@ -1234,24 +1266,32 @@ static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v1_0_stop(struct amdgpu_device *adev)
+static int vcn_v1_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
int r;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- r = vcn_v1_0_stop_dpg_mode(adev);
+ r = vcn_v1_0_stop_dpg_mode(vinst);
else
- r = vcn_v1_0_stop_spg_mode(adev);
+ r = vcn_v1_0_stop_spg_mode(vinst);
return r;
}
-static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
int ret_code;
uint32_t reg_data = 0;
uint32_t reg_data2 = 0;
@@ -1377,16 +1417,16 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
return 0;
}
-static bool vcn_v1_0_is_idle(void *handle)
+static bool vcn_v1_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
-static int vcn_v1_0_wait_for_idle(void *handle)
+static int vcn_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
@@ -1395,20 +1435,21 @@ static int vcn_v1_0_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v1_0_set_clockgating_state(void *handle,
+static int vcn_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
bool enable = (state == AMD_CG_STATE_GATE);
if (enable) {
/* wait for STATUS to clear */
- if (!vcn_v1_0_is_idle(handle))
+ if (!vcn_v1_0_is_idle(ip_block))
return -EBUSY;
- vcn_v1_0_enable_clock_gating(adev);
+ vcn_v1_0_enable_clock_gating(vinst);
} else {
/* disable HW gating and enable Sw gating */
- vcn_v1_0_disable_clock_gating(adev);
+ vcn_v1_0_disable_clock_gating(vinst);
}
return 0;
}
@@ -1800,8 +1841,8 @@ static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun
}
}
-static int vcn_v1_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v1_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCN block.
* That's done in the dpm code via the SMC. This
@@ -1811,28 +1852,29 @@ static int vcn_v1_0_set_powergating_state(void *handle,
* the smc and the hw blocks
*/
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v1_0_stop(adev);
+ ret = vcn_v1_0_stop(vinst);
else
- ret = vcn_v1_0_start(adev);
+ ret = vcn_v1_0_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
+
return ret;
}
static void vcn_v1_0_idle_work_handler(struct work_struct *work)
{
- struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, vcn.idle_work.work);
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
unsigned int fences = 0, i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
@@ -1848,7 +1890,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
- adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ adev->vcn.inst->pause_dpg_mode(vcn_inst, &new_state);
}
fences += amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec);
@@ -1857,21 +1899,21 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work)
if (fences == 0) {
amdgpu_gfx_off_ctrl(adev, true);
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_GATE);
} else {
- schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+ schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
}
}
static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+ bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
- mutex_lock(&adev->vcn.vcn1_jpeg1_workaround);
+ mutex_lock(&adev->vcn.inst[0].vcn1_jpeg1_workaround);
if (amdgpu_fence_wait_empty(ring->adev->jpeg.inst->ring_dec))
DRM_ERROR("VCN dec: jpeg dec ring may not be empty\n");
@@ -1887,7 +1929,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, 0);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_UNGATE);
@@ -1897,7 +1939,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
struct dpg_pause_state new_state;
unsigned int fences = 0, i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]);
if (fences)
@@ -1915,19 +1957,19 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks)
else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
new_state.jpeg = VCN_DPG_STATE__PAUSE;
- adev->vcn.pause_dpg_mode(adev, 0, &new_state);
+ adev->vcn.inst->pause_dpg_mode(adev->vcn.inst, &new_state);
}
}
void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring)
{
- schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
- mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround);
+ schedule_delayed_work(&ring->adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
+ mutex_unlock(&ring->adev->vcn.inst[0].vcn1_jpeg1_workaround);
}
-static void vcn_v1_0_print_ip_state(void *handle, struct drm_printer *p)
+static void vcn_v1_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_1_0);
uint32_t inst_off, is_powered;
@@ -1957,9 +1999,9 @@ static void vcn_v1_0_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void vcn_v1_0_dump_ip_state(void *handle)
+static void vcn_v1_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
bool is_powered;
uint32_t inst_off;
@@ -1988,7 +2030,6 @@ static void vcn_v1_0_dump_ip_state(void *handle)
static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.name = "vcn_v1_0",
.early_init = vcn_v1_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v1_0_sw_init,
.sw_fini = vcn_v1_0_sw_fini,
.hw_init = vcn_v1_0_hw_init,
@@ -1997,12 +2038,8 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.resume = vcn_v1_0_resume,
.is_idle = vcn_v1_0_is_idle,
.wait_for_idle = vcn_v1_0_wait_for_idle,
- .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */,
- .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */,
- .soft_reset = NULL /* vcn_v1_0_soft_reset */,
- .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */,
.set_clockgating_state = vcn_v1_0_set_clockgating_state,
- .set_powergating_state = vcn_v1_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
.dump_ip_state = vcn_v1_0_dump_ip_state,
.print_ip_state = vcn_v1_0_print_ip_state,
};
@@ -2061,11 +2098,11 @@ static int vcn_v1_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
uint32_t reg = amdgpu_ib_get_value(ib, i);
uint32_t val = amdgpu_ib_get_value(ib, i + 1);
- if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
+ if (reg == PACKET0(p->adev->vcn.inst[0].internal.data0, 0)) {
msg_lo = val;
- } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
+ } else if (reg == PACKET0(p->adev->vcn.inst[0].internal.data1, 0)) {
msg_hi = val;
- } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0)) {
+ } else if (reg == PACKET0(p->adev->vcn.inst[0].internal.cmd, 0)) {
r = vcn_v1_0_validate_bo(p, job,
((u64)msg_hi) << 32 | msg_lo);
if (r)
@@ -2150,7 +2187,7 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs;
}
@@ -2161,7 +2198,7 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2;
+ adev->vcn.inst->irq.num_types = adev->vcn.inst[0].num_enc_rings + 2;
adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index bfd067e2d2f1..148b651be7ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -39,6 +39,7 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503
@@ -92,49 +93,50 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = {
static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v2_0_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static int vcn_v2_0_start_sriov(struct amdgpu_device *adev);
/**
* vcn_v2_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v2_0_early_init(void *handle)
+static int vcn_v2_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
- adev->vcn.num_enc_rings = 1;
+ adev->vcn.inst[0].num_enc_rings = 1;
else
- adev->vcn.num_enc_rings = 2;
+ adev->vcn.inst[0].num_enc_rings = 2;
+ adev->vcn.inst->set_pg_state = vcn_v2_0_set_pg_state;
vcn_v2_0_set_dec_ring_funcs(adev);
vcn_v2_0_set_enc_ring_funcs(adev);
vcn_v2_0_set_irq_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ return amdgpu_vcn_early_init(adev, 0);
}
/**
* vcn_v2_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v2_0_sw_init(void *handle)
+static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, r;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0);
uint32_t *ptr;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
volatile struct amdgpu_fw_shared *fw_shared;
/* VCN DEC TRAP */
@@ -145,7 +147,7 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
/* VCN ENC TRAP */
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
&adev->vcn.inst->irq);
@@ -153,13 +155,13 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
}
- r = amdgpu_vcn_sw_init(adev);
+ r = amdgpu_vcn_sw_init(adev, 0);
if (r)
return r;
- amdgpu_vcn_setup_ucode(adev);
+ amdgpu_vcn_setup_ucode(adev, 0);
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(adev, 0);
if (r)
return r;
@@ -175,25 +177,25 @@ static int vcn_v2_0_sw_init(void *handle)
if (r)
return r;
- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[0].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
ring = &adev->vcn.inst->ring_enc[i];
@@ -210,7 +212,7 @@ static int vcn_v2_0_sw_init(void *handle)
return r;
}
- adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
+ adev->vcn.inst[0].pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
@@ -237,14 +239,14 @@ static int vcn_v2_0_sw_init(void *handle)
/**
* vcn_v2_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v2_0_sw_fini(void *handle)
+static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
int r, idx;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -254,11 +256,11 @@ static int vcn_v2_0_sw_fini(void *handle)
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(adev, 0);
if (r)
return r;
- r = amdgpu_vcn_sw_fini(adev);
+ r = amdgpu_vcn_sw_fini(adev, 0);
kfree(adev->vcn.ip_dump);
@@ -268,13 +270,13 @@ static int vcn_v2_0_sw_fini(void *handle)
/**
* vcn_v2_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v2_0_hw_init(void *handle)
+static int vcn_v2_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
int i, r;
@@ -292,7 +294,7 @@ static int vcn_v2_0_hw_init(void *handle)
if (amdgpu_sriov_vf(adev))
ring->sched.ready = false;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
ring = &adev->vcn.inst->ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
@@ -305,20 +307,21 @@ static int vcn_v2_0_hw_init(void *handle)
/**
* vcn_v2_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v2_0_hw_fini(void *handle)
+static int vcn_v2_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_vcn_inst *vinst = adev->vcn.inst;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ cancel_delayed_work_sync(&vinst->idle_work);
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
- vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
return 0;
}
@@ -326,20 +329,19 @@ static int vcn_v2_0_hw_fini(void *handle)
/**
* vcn_v2_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v2_0_suspend(void *handle)
+static int vcn_v2_0_suspend(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = vcn_v2_0_hw_fini(adev);
+ r = vcn_v2_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ r = amdgpu_vcn_suspend(ip_block->adev, 0);
return r;
}
@@ -347,20 +349,19 @@ static int vcn_v2_0_suspend(void *handle)
/**
* vcn_v2_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v2_0_resume(void *handle)
+static int vcn_v2_0_resume(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_vcn_resume(adev);
+ r = amdgpu_vcn_resume(ip_block->adev, 0);
if (r)
return r;
- r = vcn_v2_0_hw_init(adev);
+ r = vcn_v2_0_hw_init(ip_block);
return r;
}
@@ -368,13 +369,14 @@ static int vcn_v2_0_resume(void *handle)
/**
* vcn_v2_0_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
+ * @vinst: Pointer to the VCN instance structure
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
+static void vcn_v2_0_mc_resume(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
if (amdgpu_sriov_vf(adev))
@@ -428,9 +430,11 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
}
-static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect)
+static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -527,12 +531,13 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec
/**
* vcn_v2_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data;
if (amdgpu_sriov_vf(adev))
@@ -636,9 +641,10 @@ static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
+static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
uint8_t sram_sel, uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t reg_data = 0;
/* enable sw clock gating control */
@@ -687,12 +693,13 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v2_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -745,8 +752,9 @@ static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -794,8 +802,9 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
}
-static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
+static void vcn_v2_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t data = 0;
if (amdgpu_sriov_vf(adev))
@@ -836,13 +845,14 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
}
}
-static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
+static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
- vcn_v2_0_enable_static_power_gating(adev);
+ vcn_v2_0_enable_static_power_gating(vinst);
/* enable dynamic power gating mode */
tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
@@ -854,7 +864,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
adev->vcn.inst->dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst->dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect);
+ vcn_v2_0_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -903,7 +913,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
+ vcn_v2_0_mc_resume_dpg_mode(vinst, indirect);
WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET(
UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
@@ -968,11 +978,18 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
/* Unstall DPG */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_start(struct amdgpu_device *adev)
+static int vcn_v2_0_start(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr;
struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;
uint32_t rb_bufsz, tmp;
@@ -980,19 +997,19 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
int i, j, r;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, 0);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
+ return vcn_v2_0_start_dpg_mode(vinst, adev->vcn.inst->indirect_sram);
- vcn_v2_0_disable_static_power_gating(adev);
+ vcn_v2_0_disable_static_power_gating(vinst);
/* set uvd status busy */
tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
/*SW clock gating */
- vcn_v2_0_disable_clock_gating(adev);
+ vcn_v2_0_disable_clock_gating(vinst);
/* enable VCPU clock */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
@@ -1036,7 +1053,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- vcn_v2_0_mc_resume(adev);
+ vcn_v2_0_mc_resume(vinst);
/* release VCPU reset to boot */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
@@ -1141,15 +1158,21 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
+static int vcn_v2_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
uint32_t tmp;
- vcn_v2_0_pause_dpg_mode(adev, 0, &state);
+ vcn_v2_0_pause_dpg_mode(vinst, &state);
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -1171,16 +1194,22 @@ static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_0_stop(struct amdgpu_device *adev)
+static int vcn_v2_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
uint32_t tmp;
int r;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_0_stop_dpg_mode(adev);
+ r = vcn_v2_0_stop_dpg_mode(vinst);
if (r)
return r;
goto power_off;
@@ -1232,19 +1261,26 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev)
/* clear status */
WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
- vcn_v2_0_enable_clock_gating(adev);
- vcn_v2_0_enable_static_power_gating(adev);
+ vcn_v2_0_enable_clock_gating(vinst);
+ vcn_v2_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS);
power_off:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, 0);
return 0;
}
-static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
int ret_code;
@@ -1319,16 +1355,16 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
return 0;
}
-static bool vcn_v2_0_is_idle(void *handle)
+static bool vcn_v2_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
-static int vcn_v2_0_wait_for_idle(void *handle)
+static int vcn_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int ret;
ret = SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
@@ -1337,10 +1373,10 @@ static int vcn_v2_0_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v2_0_set_clockgating_state(void *handle,
+static int vcn_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
if (amdgpu_sriov_vf(adev))
@@ -1348,12 +1384,12 @@ static int vcn_v2_0_set_clockgating_state(void *handle,
if (enable) {
/* wait for STATUS to clear */
- if (!vcn_v2_0_is_idle(handle))
+ if (!vcn_v2_0_is_idle(ip_block))
return -EBUSY;
- vcn_v2_0_enable_clock_gating(adev);
+ vcn_v2_0_enable_clock_gating(&adev->vcn.inst[0]);
} else {
/* disable HW gating and enable Sw gating */
- vcn_v2_0_disable_clock_gating(adev);
+ vcn_v2_0_disable_clock_gating(&adev->vcn.inst[0]);
}
return 0;
}
@@ -1423,9 +1459,9 @@ void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
}
@@ -1440,7 +1476,7 @@ void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[0].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1));
}
@@ -1460,7 +1496,7 @@ void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
WARN_ON(ring->wptr % 2 || count % 2);
for (i = 0; i < count / 2; i++) {
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.nop, 0));
amdgpu_ring_write(ring, 0);
}
}
@@ -1481,25 +1517,25 @@ void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
struct amdgpu_device *adev = ring->adev;
WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.context_id, 0));
amdgpu_ring_write(ring, seq);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, addr & 0xffffffff);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1));
}
@@ -1522,14 +1558,14 @@ void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_vmid, 0));
amdgpu_ring_write(ring, vmid);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_bar_low, 0));
amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_bar_high, 0));
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_size, 0));
amdgpu_ring_write(ring, ib->length_dw);
}
@@ -1538,16 +1574,16 @@ void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, val);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.gp_scratch8, 0));
amdgpu_ring_write(ring, mask);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1));
}
@@ -1572,13 +1608,13 @@ void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
{
struct amdgpu_device *adev = ring->adev;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0));
amdgpu_ring_write(ring, reg << 2);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0));
amdgpu_ring_write(ring, val);
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1));
}
@@ -1779,9 +1815,9 @@ int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 4);
if (r)
return r;
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0));
amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
- amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
@@ -1798,8 +1834,8 @@ int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring)
}
-static int vcn_v2_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
/* This doesn't actually powergate the VCN block.
* That's done in the dpm code via the SMC. This
@@ -1809,23 +1845,24 @@ static int vcn_v2_0_set_powergating_state(void *handle,
* the smc and the hw blocks
*/
int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = vinst->adev;
if (amdgpu_sriov_vf(adev)) {
- adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
return 0;
}
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v2_0_stop(adev);
+ ret = vcn_v2_0_stop(vinst);
else
- ret = vcn_v2_0_start(adev);
+ ret = vcn_v2_0_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
+
return ret;
}
@@ -1864,7 +1901,7 @@ static int vcn_v2_0_start_mmsch(struct amdgpu_device *adev,
adev->vcn.inst->ring_dec.wptr_old = 0;
vcn_v2_0_dec_ring_set_wptr(&adev->vcn.inst->ring_dec);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) {
adev->vcn.inst->ring_enc[i].wptr = 0;
adev->vcn.inst->ring_enc[i].wptr_old = 0;
vcn_v2_0_enc_ring_set_wptr(&adev->vcn.inst->ring_enc[i]);
@@ -1922,7 +1959,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
init_table += header->vcn_table_offset;
- size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4);
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4);
MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT(
SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
@@ -1990,7 +2027,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
AMDGPU_VCN_CONTEXT_SIZE);
- for (r = 0; r < adev->vcn.num_enc_rings; ++r) {
+ for (r = 0; r < adev->vcn.inst[0].num_enc_rings; ++r) {
ring = &adev->vcn.inst->ring_enc[r];
ring->wptr = 0;
MMSCH_V2_0_INSERT_DIRECT_WT(
@@ -2034,9 +2071,9 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev)
return vcn_v2_0_start_mmsch(adev, &adev->virt.mm_table);
}
-static void vcn_v2_0_print_ip_state(void *handle, struct drm_printer *p)
+static void vcn_v2_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_0);
uint32_t inst_off, is_powered;
@@ -2066,9 +2103,9 @@ static void vcn_v2_0_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void vcn_v2_0_dump_ip_state(void *handle)
+static void vcn_v2_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
bool is_powered;
uint32_t inst_off;
@@ -2097,7 +2134,6 @@ static void vcn_v2_0_dump_ip_state(void *handle)
static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
.name = "vcn_v2_0",
.early_init = vcn_v2_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v2_0_sw_init,
.sw_fini = vcn_v2_0_sw_fini,
.hw_init = vcn_v2_0_hw_init,
@@ -2106,12 +2142,8 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
.resume = vcn_v2_0_resume,
.is_idle = vcn_v2_0_is_idle,
.wait_for_idle = vcn_v2_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_0_set_clockgating_state,
- .set_powergating_state = vcn_v2_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
.dump_ip_state = vcn_v2_0_dump_ip_state,
.print_ip_state = vcn_v2_0_print_ip_state,
};
@@ -2184,7 +2216,7 @@ static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i)
adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs;
}
@@ -2195,7 +2227,7 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst->irq.num_types = adev->vcn.inst[0].num_enc_rings + 1;
adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 04e9e806e318..58b527a6b795 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -39,6 +39,7 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f
@@ -95,10 +96,10 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = {
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v2_5_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev);
@@ -107,22 +108,151 @@ static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN1
};
+static void vcn_v2_5_idle_work_handler(struct work_struct *work)
+{
+ struct amdgpu_vcn_inst *vcn_inst =
+ container_of(work, struct amdgpu_vcn_inst, idle_work.work);
+ struct amdgpu_device *adev = vcn_inst->adev;
+ unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
+ unsigned int i, j;
+ int r = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *v = &adev->vcn.inst[i];
+
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < v->num_enc_rings; ++j)
+ fence[i] += amdgpu_fence_count_emitted(&v->ring_enc[j]);
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !v->using_unified_queue) {
+ struct dpg_pause_state new_state;
+
+ if (fence[i] ||
+ unlikely(atomic_read(&v->dpg_enc_submission_cnt)))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ v->pause_dpg_mode(v, &new_state);
+ }
+
+ fence[i] += amdgpu_fence_count_emitted(&v->ring_dec);
+ fences += fence[i];
+
+ }
+
+ if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) {
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_GATE);
+ mutex_lock(&adev->vcn.workload_profile_mutex);
+ if (adev->vcn.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+ false);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+ adev->vcn.workload_profile_active = false;
+ }
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
+ } else {
+ schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
+ }
+}
+
+static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *v = &adev->vcn.inst[ring->me];
+ int r = 0;
+
+ atomic_inc(&adev->vcn.inst[0].total_submission_cnt);
+
+ cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
+
+ /* We can safely return early here because we've cancelled the
+ * the delayed work so there is no one else to set it to false
+ * and we don't care if someone else sets it to true.
+ */
+ if (adev->vcn.workload_profile_active)
+ goto pg_lock;
+
+ mutex_lock(&adev->vcn.workload_profile_mutex);
+ if (!adev->vcn.workload_profile_active) {
+ r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+ true);
+ if (r)
+ dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
+ adev->vcn.workload_profile_active = true;
+ }
+ mutex_unlock(&adev->vcn.workload_profile_mutex);
+
+pg_lock:
+ mutex_lock(&adev->vcn.inst[0].vcn_pg_lock);
+ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+ AMD_PG_STATE_UNGATE);
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ !v->using_unified_queue) {
+ struct dpg_pause_state new_state;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
+ atomic_inc(&v->dpg_enc_submission_cnt);
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ } else {
+ unsigned int fences = 0;
+ unsigned int i;
+
+ for (i = 0; i < v->num_enc_rings; ++i)
+ fences += amdgpu_fence_count_emitted(&v->ring_enc[i]);
+
+ if (fences || atomic_read(&v->dpg_enc_submission_cnt))
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+ }
+ v->pause_dpg_mode(v, &new_state);
+ }
+ mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock);
+}
+
+static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */
+ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC &&
+ !adev->vcn.inst[ring->me].using_unified_queue)
+ atomic_dec(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
+
+ atomic_dec(&adev->vcn.inst[0].total_submission_cnt);
+
+ schedule_delayed_work(&adev->vcn.inst[0].idle_work,
+ VCN_IDLE_TIMEOUT);
+}
+
/**
* vcn_v2_5_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v2_5_early_init(void *handle)
+static int vcn_v2_5_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
adev->vcn.num_vcn_inst = 2;
adev->vcn.harvest_config = 0;
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ adev->vcn.inst[i].num_enc_rings = 1;
} else {
u32 harvest;
int i;
@@ -131,13 +261,12 @@ static int vcn_v2_5_early_init(void *handle)
harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
adev->vcn.harvest_config |= 1 << i;
+ adev->vcn.inst[i].num_enc_rings = 2;
}
if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
- AMDGPU_VCN_HARVEST_VCN1))
+ AMDGPU_VCN_HARVEST_VCN1))
/* both instances are harvested, disable the block */
return -ENOENT;
-
- adev->vcn.num_enc_rings = 2;
}
vcn_v2_5_set_dec_ring_funcs(adev);
@@ -145,25 +274,35 @@ static int vcn_v2_5_early_init(void *handle)
vcn_v2_5_set_irq_funcs(adev);
vcn_v2_5_set_ras_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ adev->vcn.inst[i].set_pg_state = vcn_v2_5_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
}
/**
* vcn_v2_5_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v2_5_sw_init(void *handle)
+static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, j, r;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5);
uint32_t *ptr;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
+ volatile struct amdgpu_fw_shared *fw_shared;
+
if (adev->vcn.harvest_config & (1 << j))
continue;
/* VCN DEC TRAP */
@@ -173,7 +312,7 @@ static int vcn_v2_5_sw_init(void *handle)
return r;
/* VCN ENC TRAP */
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq);
if (r)
@@ -185,39 +324,36 @@ static int vcn_v2_5_sw_init(void *handle)
VCN_2_6__SRCID_UVD_POISON, &adev->vcn.inst[j].ras_poison_irq);
if (r)
return r;
- }
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
+ r = amdgpu_vcn_sw_init(adev, j);
+ if (r)
+ return r;
- amdgpu_vcn_setup_ucode(adev);
+ /* Override the work func */
+ adev->vcn.inst[j].idle_work.work.func = vcn_v2_5_idle_work_handler;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ amdgpu_vcn_setup_ucode(adev, j);
- for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
- volatile struct amdgpu_fw_shared *fw_shared;
+ r = amdgpu_vcn_resume(adev, j);
+ if (r)
+ return r;
- if (adev->vcn.harvest_config & (1 << j))
- continue;
- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
-
- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+
+ adev->vcn.inst[j].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[j].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP);
ring = &adev->vcn.inst[j].ring_dec;
@@ -237,7 +373,7 @@ static int vcn_v2_5_sw_init(void *handle)
if (r)
return r;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
ring = &adev->vcn.inst[j].ring_enc[i];
@@ -265,6 +401,9 @@ static int vcn_v2_5_sw_init(void *handle)
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[j].pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
}
if (amdgpu_sriov_vf(adev)) {
@@ -273,9 +412,6 @@ static int vcn_v2_5_sw_init(void *handle)
return r;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
-
r = amdgpu_vcn_ras_sw_init(adev);
if (r)
return r;
@@ -295,14 +431,14 @@ static int vcn_v2_5_sw_init(void *handle)
/**
* vcn_v2_5_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v2_5_sw_fini(void *handle)
+static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block)
{
int i, r, idx;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
volatile struct amdgpu_fw_shared *fw_shared;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -319,27 +455,30 @@ static int vcn_v2_5_sw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
-
- r = amdgpu_vcn_sw_fini(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ r = amdgpu_vcn_sw_fini(adev, i);
+ if (r)
+ return r;
+ }
kfree(adev->vcn.ip_dump);
- return r;
+ return 0;
}
/**
* vcn_v2_5_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v2_5_hw_init(void *handle)
+static int vcn_v2_5_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, j, r = 0;
@@ -366,7 +505,7 @@ static int vcn_v2_5_hw_init(void *handle)
if (r)
return r;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
ring = &adev->vcn.inst[j].ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
@@ -381,28 +520,30 @@ static int vcn_v2_5_hw_init(void *handle)
/**
* vcn_v2_5_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v2_5_hw_fini(void *handle)
+static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
+ cancel_delayed_work_sync(&vinst->idle_work);
+
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
RREG32_SOC15(VCN, i, mmUVD_STATUS)))
- vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
- amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
+ amdgpu_irq_put(adev, &vinst->ras_poison_irq, 0);
}
return 0;
@@ -411,41 +552,47 @@ static int vcn_v2_5_hw_fini(void *handle)
/**
* vcn_v2_5_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v2_5_suspend(void *handle)
+static int vcn_v2_5_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v2_5_hw_fini(adev);
+ r = vcn_v2_5_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- return r;
+ return 0;
}
/**
* vcn_v2_5_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v2_5_resume(void *handle)
+static int vcn_v2_5_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v2_5_hw_init(adev);
+ r = vcn_v2_5_hw_init(ip_block);
return r;
}
@@ -453,70 +600,72 @@ static int vcn_v2_5_resume(void *handle)
/**
* vcn_v2_5_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
+static void vcn_v2_5_mc_resume(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t size;
uint32_t offset;
- int i;
-
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
- /* cache window 0: fw */
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
- offset = 0;
- } else {
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].gpu_addr));
- offset = size;
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
- }
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
- /* cache window 1: stack */
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
-
- /* cache window 2: context */
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
- WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
-
- /* non-cache window */
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
- WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
- AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
}
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
}
-static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -613,123 +762,124 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
/**
* vcn_v2_5_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_5_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t data;
- int i;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* UVD disable CGC */
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
- data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- else
- data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
- data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
- data &= ~(UVD_CGC_GATE__SYS_MASK
- | UVD_CGC_GATE__UDEC_MASK
- | UVD_CGC_GATE__MPEG2_MASK
- | UVD_CGC_GATE__REGS_MASK
- | UVD_CGC_GATE__RBC_MASK
- | UVD_CGC_GATE__LMI_MC_MASK
- | UVD_CGC_GATE__LMI_UMC_MASK
- | UVD_CGC_GATE__IDCT_MASK
- | UVD_CGC_GATE__MPRD_MASK
- | UVD_CGC_GATE__MPC_MASK
- | UVD_CGC_GATE__LBSI_MASK
- | UVD_CGC_GATE__LRBBM_MASK
- | UVD_CGC_GATE__UDEC_RE_MASK
- | UVD_CGC_GATE__UDEC_CM_MASK
- | UVD_CGC_GATE__UDEC_IT_MASK
- | UVD_CGC_GATE__UDEC_DB_MASK
- | UVD_CGC_GATE__UDEC_MP_MASK
- | UVD_CGC_GATE__WCB_MASK
- | UVD_CGC_GATE__VCPU_MASK
- | UVD_CGC_GATE__MMSCH_MASK);
-
- WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
-
- SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
-
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
- | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
- | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
- | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
- | UVD_CGC_CTRL__SYS_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MODE_MASK
- | UVD_CGC_CTRL__MPEG2_MODE_MASK
- | UVD_CGC_CTRL__REGS_MODE_MASK
- | UVD_CGC_CTRL__RBC_MODE_MASK
- | UVD_CGC_CTRL__LMI_MC_MODE_MASK
- | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
- | UVD_CGC_CTRL__IDCT_MODE_MASK
- | UVD_CGC_CTRL__MPRD_MODE_MASK
- | UVD_CGC_CTRL__MPC_MODE_MASK
- | UVD_CGC_CTRL__LBSI_MODE_MASK
- | UVD_CGC_CTRL__LRBBM_MODE_MASK
- | UVD_CGC_CTRL__WCB_MODE_MASK
- | UVD_CGC_CTRL__VCPU_MODE_MASK
- | UVD_CGC_CTRL__MMSCH_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- /* turn on */
- data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
- data |= (UVD_SUVD_CGC_GATE__SRE_MASK
- | UVD_SUVD_CGC_GATE__SIT_MASK
- | UVD_SUVD_CGC_GATE__SMP_MASK
- | UVD_SUVD_CGC_GATE__SCM_MASK
- | UVD_SUVD_CGC_GATE__SDB_MASK
- | UVD_SUVD_CGC_GATE__SRE_H264_MASK
- | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SIT_H264_MASK
- | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SCM_H264_MASK
- | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SDB_H264_MASK
- | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
- | UVD_SUVD_CGC_GATE__SCLR_MASK
- | UVD_SUVD_CGC_GATE__UVD_SC_MASK
- | UVD_SUVD_CGC_GATE__ENT_MASK
- | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
- | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
- | UVD_SUVD_CGC_GATE__SITE_MASK
- | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
- | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
- | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
- | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
- | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
- WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
- data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
- | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
- | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
+ /* UVD disable CGC */
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__MMSCH_MASK);
+
+ WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
+
+ SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ /* turn on */
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
- }
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
- uint8_t sram_sel, int inst_idx, uint8_t indirect)
+static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel, uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
/* enable sw clock gating control */
@@ -778,68 +928,69 @@ static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v2_5_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
+static void vcn_v2_5_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t data = 0;
- int i;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* enable UVD CGC */
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
- data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- else
- data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
- data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
- | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
- | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
- | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
- | UVD_CGC_CTRL__SYS_MODE_MASK
- | UVD_CGC_CTRL__UDEC_MODE_MASK
- | UVD_CGC_CTRL__MPEG2_MODE_MASK
- | UVD_CGC_CTRL__REGS_MODE_MASK
- | UVD_CGC_CTRL__RBC_MODE_MASK
- | UVD_CGC_CTRL__LMI_MC_MODE_MASK
- | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
- | UVD_CGC_CTRL__IDCT_MODE_MASK
- | UVD_CGC_CTRL__MPRD_MODE_MASK
- | UVD_CGC_CTRL__MPC_MODE_MASK
- | UVD_CGC_CTRL__LBSI_MODE_MASK
- | UVD_CGC_CTRL__LRBBM_MODE_MASK
- | UVD_CGC_CTRL__WCB_MODE_MASK
- | UVD_CGC_CTRL__VCPU_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
-
- data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
- data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
- | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
- | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
- | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
- | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
- WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
- }
+ if (adev->vcn.harvest_config & (1 << i))
+ return;
+ /* enable UVD CGC */
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx,
+static void vcn_v2_6_enable_ras(struct amdgpu_vcn_inst *vinst,
bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t tmp;
if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(2, 6, 0))
@@ -864,8 +1015,10 @@ static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx,
tmp, 0, indirect);
}
-static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
@@ -883,7 +1036,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+ vcn_v2_5_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -932,7 +1085,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v2_5_mc_resume_dpg_mode(vinst, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
@@ -943,7 +1096,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
- vcn_v2_6_enable_ras(adev, inst_idx, indirect);
+ vcn_v2_6_enable_ras(vinst, indirect);
/* unblock VCPU register access */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
@@ -1005,198 +1158,200 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_5_start(struct amdgpu_device *adev)
+static int vcn_v2_5_start(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ volatile struct amdgpu_fw_shared *fw_shared =
+ adev->vcn.inst[i].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
- int i, j, k, r;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, i);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v2_5_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
- /* disable register anti-hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
- ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
- /* set uvd status busy */
- tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
- }
+ /* set uvd status busy */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
return 0;
- /*SW clock gating */
- vcn_v2_5_disable_clock_gating(adev);
+ /* SW clock gating */
+ vcn_v2_5_disable_clock_gating(vinst);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* setup mmUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
- tmp &= ~0xff;
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- /* setup mmUVD_MPC_CNTL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
-
- /* setup UVD_MPC_SET_MUXA0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUXB0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
-
- /* setup mmUVD_MPC_SET_MUX */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- }
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
- vcn_v2_5_mc_resume(adev);
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
+ tmp &= ~0xff;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v2_5_mc_resume(vinst);
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- for (k = 0; k < 10; ++k) {
- uint32_t status;
-
- for (j = 0; j < 100; ++j) {
- status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
- if (status & 2)
- break;
- if (amdgpu_emu_mode == 1)
- msleep(500);
- else
- mdelay(10);
- }
- r = 0;
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (k = 0; k < 10; ++k) {
+ uint32_t status;
+
+ for (j = 0; j < 100; ++j) {
+ status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
if (status & 2)
break;
+ if (amdgpu_emu_mode == 1)
+ msleep(500);
+ else
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
- DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- r = -1;
- }
+ mdelay(10);
+ r = -1;
+ }
- if (r) {
- DRM_ERROR("VCN decode not responding, giving up!!!\n");
- return r;
- }
+ if (r) {
+ DRM_ERROR("VCN decode not responding, giving up!!!\n");
+ return r;
+ }
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
- ring = &adev->vcn.inst[i].ring_dec;
- /* force RBC into idle state */
- rb_bufsz = order_base_2(ring->ring_size);
- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
+ ring = &adev->vcn.inst[i].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
- fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
- /* program the RB_BASE for ring buffer */
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
+ fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
- /* Initialize the ring buffer's read and write pointers */
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
- ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
- lower_32_bits(ring->wptr));
- fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
+ ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
- fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
- ring = &adev->vcn.inst[i].ring_enc[0];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
- fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
-
- fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
- ring = &adev->vcn.inst[i].ring_enc[1];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
- fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
- }
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
+ ring = &adev->vcn.inst[i].ring_enc[1];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
return 0;
}
@@ -1287,7 +1442,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
/* mc resume*/
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V1_0_INSERT_DIRECT_WT(
@@ -1397,8 +1552,10 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
}
-static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static int vcn_v2_5_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t tmp;
/* Wait for power status to be 1 */
@@ -1422,80 +1579,93 @@ static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v2_5_stop(struct amdgpu_device *adev)
+static int vcn_v2_5_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t tmp;
- int i, r = 0;
+ int r;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v2_5_stop_dpg_mode(adev, i);
- continue;
- }
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
- if (r)
- return r;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_5_stop_dpg_mode(vinst);
+ goto done;
+ }
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
- /* block LMI UMC channel */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* block LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
- /* block VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- /* reset VCPU */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* clear status */
- WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
- vcn_v2_5_enable_clock_gating(adev);
+ /* clear status */
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
- /* enable register anti-hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
- ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
- }
+ vcn_v2_5_enable_clock_gating(vinst);
+ /* enable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
+done:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, i);
- return 0;
+ return r;
}
-static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
int ret_code = 0;
@@ -1641,8 +1811,8 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
.insert_start = vcn_v2_0_dec_ring_insert_start,
.insert_end = vcn_v2_0_dec_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
+ .begin_use = vcn_v2_5_ring_begin_use,
+ .end_use = vcn_v2_5_ring_end_use,
.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
@@ -1739,8 +1909,8 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
.insert_nop = amdgpu_ring_insert_nop,
.insert_end = vcn_v2_0_enc_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
+ .begin_use = vcn_v2_5_ring_begin_use,
+ .end_use = vcn_v2_5_ring_end_use,
.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
@@ -1765,30 +1935,31 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
if (adev->vcn.harvest_config & (1 << j))
continue;
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) {
adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
adev->vcn.inst[j].ring_enc[i].me = j;
}
}
}
-static bool vcn_v2_5_is_idle(void *handle)
+static bool vcn_v2_5_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
+
ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
return ret;
}
-static int vcn_v2_5_wait_for_idle(void *handle)
+static int vcn_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1803,45 +1974,50 @@ static int vcn_v2_5_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v2_5_set_clockgating_state(void *handle,
+static int vcn_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE);
+ int i;
if (amdgpu_sriov_vf(adev))
return 0;
- if (enable) {
- if (!vcn_v2_5_is_idle(handle))
- return -EBUSY;
- vcn_v2_5_enable_clock_gating(adev);
- } else {
- vcn_v2_5_disable_clock_gating(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (enable) {
+ if (!vcn_v2_5_is_idle(ip_block))
+ return -EBUSY;
+ vcn_v2_5_enable_clock_gating(vinst);
+ } else {
+ vcn_v2_5_disable_clock_gating(vinst);
+ }
}
return 0;
}
-static int vcn_v2_5_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v2_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = vinst->adev;
int ret;
if (amdgpu_sriov_vf(adev))
return 0;
- if(state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v2_5_stop(adev);
+ ret = vcn_v2_5_stop(vinst);
else
- ret = vcn_v2_5_start(adev);
+ ret = vcn_v2_5_start(vinst);
- if(!ret)
- adev->vcn.cur_state = state;
+ if (!ret)
+ vinst->cur_state = state;
return ret;
}
@@ -1918,17 +2094,17 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
- adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v2_6_ras_irq_funcs;
}
}
-static void vcn_v2_5_print_ip_state(void *handle, struct drm_printer *p)
+static void vcn_v2_5_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_2_5);
uint32_t inst_off, is_powered;
@@ -1958,9 +2134,9 @@ static void vcn_v2_5_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void vcn_v2_5_dump_ip_state(void *handle)
+static void vcn_v2_5_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
bool is_powered;
uint32_t inst_off;
@@ -1989,7 +2165,6 @@ static void vcn_v2_5_dump_ip_state(void *handle)
static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
.name = "vcn_v2_5",
.early_init = vcn_v2_5_early_init,
- .late_init = NULL,
.sw_init = vcn_v2_5_sw_init,
.sw_fini = vcn_v2_5_sw_fini,
.hw_init = vcn_v2_5_hw_init,
@@ -1998,12 +2173,8 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
.resume = vcn_v2_5_resume,
.is_idle = vcn_v2_5_is_idle,
.wait_for_idle = vcn_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_5_set_clockgating_state,
- .set_powergating_state = vcn_v2_5_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
.dump_ip_state = vcn_v2_5_dump_ip_state,
.print_ip_state = vcn_v2_5_print_ip_state,
};
@@ -2011,7 +2182,6 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
.name = "vcn_v2_6",
.early_init = vcn_v2_5_early_init,
- .late_init = NULL,
.sw_init = vcn_v2_5_sw_init,
.sw_fini = vcn_v2_5_sw_fini,
.hw_init = vcn_v2_5_hw_init,
@@ -2020,12 +2190,8 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
.resume = vcn_v2_5_resume,
.is_idle = vcn_v2_5_is_idle,
.wait_for_idle = vcn_v2_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_5_set_clockgating_state,
- .set_powergating_state = vcn_v2_5_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
.dump_ip_state = vcn_v2_5_dump_ip_state,
.print_ip_state = vcn_v2_5_print_ip_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 65dd68b32280..9fb0d5380589 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -40,6 +40,7 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48200
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f
@@ -105,10 +106,10 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev);
static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v3_0_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring);
@@ -116,19 +117,21 @@ static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring);
/**
* vcn_v3_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v3_0_early_init(void *handle)
+static int vcn_v3_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
adev->vcn.harvest_config = 0;
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++)
+ adev->vcn.inst[i].num_enc_rings = 1;
} else {
if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
@@ -136,45 +139,44 @@ static int vcn_v3_0_early_init(void *handle)
/* both instances are harvested, disable the block */
return -ENOENT;
- if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
- IP_VERSION(3, 0, 33))
- adev->vcn.num_enc_rings = 0;
- else
- adev->vcn.num_enc_rings = 2;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
+ IP_VERSION(3, 0, 33))
+ adev->vcn.inst[i].num_enc_rings = 0;
+ else
+ adev->vcn.inst[i].num_enc_rings = 2;
+ }
}
vcn_v3_0_set_dec_ring_funcs(adev);
vcn_v3_0_set_enc_ring_funcs(adev);
vcn_v3_0_set_irq_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ adev->vcn.inst[i].set_pg_state = vcn_v3_0_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+ return 0;
}
/**
* vcn_v3_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v3_0_sw_init(void *handle)
+static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
int i, j, r;
int vcn_doorbell_index = 0;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0);
uint32_t *ptr;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
-
- amdgpu_vcn_setup_ucode(adev);
-
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ struct amdgpu_device *adev = ip_block->adev;
/*
* Note: doorbell assignment is fixed for SRIOV multiple VCN engines
@@ -195,22 +197,32 @@ static int vcn_v3_0_sw_init(void *handle)
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
- adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
- adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
- adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ adev->vcn.inst[i].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
- adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
adev->vcn.inst[i].external.scratch9 = SOC15_REG_OFFSET(VCN, i, mmUVD_SCRATCH9);
- adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
adev->vcn.inst[i].external.data0 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA0);
- adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
adev->vcn.inst[i].external.data1 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA1);
- adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
adev->vcn.inst[i].external.cmd = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_CMD);
- adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.inst[i].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst[i].external.nop = SOC15_REG_OFFSET(VCN, i, mmUVD_NO_OP);
/* VCN DEC TRAP */
@@ -224,7 +236,7 @@ static int vcn_v3_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_dec;
ring->use_doorbell = true;
if (amdgpu_sriov_vf(adev)) {
- ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1);
+ ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.inst[i].num_enc_rings + 1);
} else {
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
}
@@ -236,7 +248,7 @@ static int vcn_v3_0_sw_init(void *handle)
if (r)
return r;
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(j);
/* VCN ENC TRAP */
@@ -248,7 +260,7 @@ static int vcn_v3_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_enc[j];
ring->use_doorbell = true;
if (amdgpu_sriov_vf(adev)) {
- ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j;
+ ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.inst[i].num_enc_rings + 1) + 1 + j;
} else {
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
}
@@ -274,6 +286,9 @@ static int vcn_v3_0_sw_init(void *handle)
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
}
if (amdgpu_sriov_vf(adev)) {
@@ -281,8 +296,6 @@ static int vcn_v3_0_sw_init(void *handle)
if (r)
return r;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
/* Allocate memory for VCN IP Dump buffer */
ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL);
@@ -299,13 +312,13 @@ static int vcn_v3_0_sw_init(void *handle)
/**
* vcn_v3_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v3_0_sw_fini(void *handle)
+static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r, idx;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -325,26 +338,30 @@ static int vcn_v3_0_sw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
- r = amdgpu_vcn_sw_fini(adev);
+ r = amdgpu_vcn_sw_fini(adev, i);
+ if (r)
+ return r;
+ }
kfree(adev->vcn.ip_dump);
- return r;
+ return 0;
}
/**
* vcn_v3_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v3_0_hw_init(void *handle)
+static int vcn_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, j, r;
@@ -370,7 +387,7 @@ static int vcn_v3_0_hw_init(void *handle)
ring->sched.ready = true;
}
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
ring->sched.ready = false;
@@ -398,7 +415,7 @@ static int vcn_v3_0_hw_init(void *handle)
if (r)
return r;
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
r = amdgpu_ring_test_helper(ring);
if (r)
@@ -413,26 +430,28 @@ static int vcn_v3_0_hw_init(void *handle)
/**
* vcn_v3_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v3_0_hw_fini(void *handle)
+static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
+ cancel_delayed_work_sync(&vinst->idle_work);
+
if (!amdgpu_sriov_vf(adev)) {
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
- vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
}
}
}
@@ -443,41 +462,47 @@ static int vcn_v3_0_hw_fini(void *handle)
/**
* vcn_v3_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v3_0_suspend(void *handle)
+static int vcn_v3_0_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v3_0_hw_fini(adev);
+ r = vcn_v3_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- return r;
+ return 0;
}
/**
* vcn_v3_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v3_0_resume(void *handle)
+static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v3_0_hw_init(adev);
+ r = vcn_v3_0_hw_init(ip_block);
return r;
}
@@ -485,14 +510,15 @@ static int vcn_v3_0_resume(void *handle)
/**
* vcn_v3_0_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_mc_resume(struct amdgpu_vcn_inst *vinst)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst]->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -540,9 +566,12 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)
AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
}
-static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
- uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4);
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4);
uint32_t offset;
/* cache window 0: fw */
@@ -636,8 +665,10 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
}
-static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -687,8 +718,10 @@ static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int
WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
}
-static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -735,13 +768,14 @@ static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int
/**
* vcn_v3_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: Pointer to the VCN instance structure
*
* Disable clock gating for VCN block
*/
-static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
/* VCN disable CGC */
@@ -868,9 +902,12 @@ static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
- uint8_t sram_sel, int inst_idx, uint8_t indirect)
+static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
/* enable sw clock gating control */
@@ -919,13 +956,14 @@ static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v3_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: Pointer to the VCN instance structure
*
* Enable clock gating for VCN block
*/
-static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v3_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
/* enable VCN CGC */
@@ -984,8 +1022,10 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
}
-static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
@@ -1003,7 +1043,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+ vcn_v3_0_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -1052,7 +1092,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v3_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v3_0_mc_resume_dpg_mode(vinst, indirect);
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
@@ -1133,195 +1173,203 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v3_0_start(struct amdgpu_device *adev)
+static int vcn_v3_0_start(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
volatile struct amdgpu_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp;
- int i, j, k, r;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, i);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v3_0_start_dpg_mode(vinst, vinst->indirect_sram);
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ /* disable VCN power gating */
+ vcn_v3_0_disable_static_power_gating(vinst);
- /* disable VCN power gating */
- vcn_v3_0_disable_static_power_gating(adev, i);
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
- /* set VCN status busy */
- tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
+ /* SW clock gating */
+ vcn_v3_0_disable_clock_gating(vinst);
- /*SW clock gating */
- vcn_v3_0_disable_clock_gating(adev, i);
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
- tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
- tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
-
- /* setup mmUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- /* setup mmUVD_MPC_CNTL */
- tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
- WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
-
- /* setup UVD_MPC_SET_MUXA0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUXB0 */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
-
- /* setup mmUVD_MPC_SET_MUX */
- WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
-
- vcn_v3_0_mc_resume(adev, i);
-
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
-
- /* release VCPU reset to boot */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- for (j = 0; j < 10; ++j) {
- uint32_t status;
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- for (k = 0; k < 100; ++k) {
- status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
- if (status & 2)
- break;
- mdelay(10);
- }
- r = 0;
- if (status & 2)
- break;
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
- DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v3_0_mc_resume(vinst);
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
+ if (status & 2)
+ break;
mdelay(10);
- r = -1;
}
+ r = 0;
+ if (status & 2)
+ break;
- if (r) {
- DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i);
- return r;
- }
+ DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ mdelay(10);
+ r = -1;
+ }
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ if (r) {
+ DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i);
+ return r;
+ }
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- ring = &adev->vcn.inst[i].ring_dec;
- /* force RBC into idle state */
- rb_bufsz = order_base_2(ring->ring_size);
- tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
- /* programm the RB_BASE for ring buffer */
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
+ ring = &adev->vcn.inst[i].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
- /* Initialize the ring buffer's read and write pointers */
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
- WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0);
- ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
- WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
- lower_32_bits(ring->wptr));
- fw_shared->rb.wptr = lower_32_bits(ring->wptr);
- fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
-
- if (amdgpu_ip_version(adev, UVD_HWIP, 0) !=
- IP_VERSION(3, 0, 33)) {
- fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
- ring = &adev->vcn.inst[i].ring_enc[0];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
- fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
-
- fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
- ring = &adev->vcn.inst[i].ring_enc[1];
- WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
- fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
- }
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0);
+ ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ fw_shared->rb.wptr = lower_32_bits(ring->wptr);
+ fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) !=
+ IP_VERSION(3, 0, 33)) {
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+
+ fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
+ ring = &adev->vcn.inst[i].ring_enc[1];
+ WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
}
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
+
return 0;
}
@@ -1375,7 +1423,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
mmUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
@@ -1434,7 +1482,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
mmUVD_VCPU_CACHE_SIZE2),
AMDGPU_VCN_CONTEXT_SIZE);
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
ring = &adev->vcn.inst[i].ring_enc[j];
ring->wptr = 0;
rb_addr = ring->gpu_addr;
@@ -1534,12 +1582,14 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
return 0;
}
-static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static int vcn_v3_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
uint32_t tmp;
- vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state);
+ vcn_v3_0_pause_dpg_mode(vinst, &state);
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
@@ -1562,87 +1612,100 @@ static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
+
return 0;
}
-static int vcn_v3_0_stop(struct amdgpu_device *adev)
+static int vcn_v3_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
uint32_t tmp;
- int i, r = 0;
+ int r = 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v3_0_stop_dpg_mode(adev, i);
- continue;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v3_0_stop_dpg_mode(vinst);
+ goto done;
+ }
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
- if (r)
- return r;
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- /* disable LMI UMC channel */
- tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
- /* block VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* reset VCPU */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
- /* apply soft reset */
- tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
+ /* clear status */
+ WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
- /* clear status */
- WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
+ /* apply HW clock gating */
+ vcn_v3_0_enable_clock_gating(vinst);
- /* apply HW clock gating */
- vcn_v3_0_enable_clock_gating(adev, i);
+ /* enable VCN power gating */
+ vcn_v3_0_enable_static_power_gating(vinst);
- /* enable VCN power gating */
- vcn_v3_0_enable_static_power_gating(adev, i);
- }
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, mmUVD_STATUS);
+done:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, i);
- return 0;
+ return r;
}
-static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state)
+static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
volatile struct amdgpu_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t reg_data = 0;
@@ -1926,11 +1989,11 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
uint32_t reg = amdgpu_ib_get_value(ib, i);
uint32_t val = amdgpu_ib_get_value(ib, i + 1);
- if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
+ if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.data0, 0)) {
msg_lo = val;
- } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
+ } else if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.data1, 0)) {
msg_hi = val;
- } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
+ } else if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.cmd, 0) &&
val == 0) {
r = vcn_v3_0_dec_msg(p, job,
((u64)msg_hi) << 32 | msg_lo);
@@ -2094,16 +2157,16 @@ static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
+ for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs;
adev->vcn.inst[i].ring_enc[j].me = i;
}
}
}
-static bool vcn_v3_0_is_idle(void *handle)
+static bool vcn_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -2116,9 +2179,9 @@ static bool vcn_v3_0_is_idle(void *handle)
return ret;
}
-static int vcn_v3_0_wait_for_idle(void *handle)
+static int vcn_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -2134,54 +2197,55 @@ static int vcn_v3_0_wait_for_idle(void *handle)
return ret;
}
-static int vcn_v3_0_set_clockgating_state(void *handle,
+static int vcn_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
if (adev->vcn.harvest_config & (1 << i))
continue;
if (enable) {
if (RREG32_SOC15(VCN, i, mmUVD_STATUS) != UVD_STATUS__IDLE)
return -EBUSY;
- vcn_v3_0_enable_clock_gating(adev, i);
+ vcn_v3_0_enable_clock_gating(vinst);
} else {
- vcn_v3_0_disable_clock_gating(adev, i);
+ vcn_v3_0_disable_clock_gating(vinst);
}
}
return 0;
}
-static int vcn_v3_0_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
/* for SRIOV, guest should not control VCN Power-gating
* MMSCH FW should control Power-gating and clock-gating
* guest should avoid touching CGC and PG
*/
if (amdgpu_sriov_vf(adev)) {
- adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
return 0;
}
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v3_0_stop(adev);
+ ret = vcn_v3_0_stop(vinst);
else
- ret = vcn_v3_0_start(adev);
+ ret = vcn_v3_0_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
return ret;
}
@@ -2246,14 +2310,14 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v3_0_irq_funcs;
}
}
-static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p)
+static void vcn_v3_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_3_0);
uint32_t inst_off;
@@ -2284,9 +2348,9 @@ static void vcn_v3_0_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void vcn_v3_0_dump_ip_state(void *handle)
+static void vcn_v3_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
bool is_powered;
uint32_t inst_off;
@@ -2315,7 +2379,6 @@ static void vcn_v3_0_dump_ip_state(void *handle)
static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
.name = "vcn_v3_0",
.early_init = vcn_v3_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v3_0_sw_init,
.sw_fini = vcn_v3_0_sw_fini,
.hw_init = vcn_v3_0_hw_init,
@@ -2324,12 +2387,8 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
.resume = vcn_v3_0_resume,
.is_idle = vcn_v3_0_is_idle,
.wait_for_idle = vcn_v3_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v3_0_set_clockgating_state,
- .set_powergating_state = vcn_v3_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
.dump_ip_state = vcn_v3_0_dump_ip_state,
.print_ip_state = vcn_v3_0_print_ip_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 26c6f10a8c8f..b5071f77f78d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -46,6 +46,7 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
#define VCN_HARVEST_MMSCH 0
@@ -96,25 +97,25 @@ static int amdgpu_ih_clientid_vcns[] = {
static int vcn_v4_0_start_sriov(struct amdgpu_device *adev);
static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v4_0_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v4_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v4_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev);
/**
* vcn_v4_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v4_0_early_init(void *handle)
+static int vcn_v4_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int i;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
adev->vcn.harvest_config = VCN_HARVEST_MMSCH;
@@ -126,14 +127,23 @@ static int vcn_v4_0_early_init(void *handle)
}
}
- /* re-use enc ring as unified ring */
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
vcn_v4_0_set_unified_ring_funcs(adev);
vcn_v4_0_set_irq_funcs(adev);
vcn_v4_0_set_ras_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v4_0_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
}
static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
@@ -164,32 +174,32 @@ static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
/**
* vcn_v4_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v4_0_sw_init(void *handle)
+static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0);
uint32_t *ptr;
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
-
- amdgpu_vcn_setup_ucode(adev);
-
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
-
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (adev->vcn.harvest_config & (1 << i))
continue;
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
/* Init instance 0 sched_score to 1, so it's scheduled after other instances */
if (i == 0)
atomic_set(&adev->vcn.inst[i].sched_score, 1);
@@ -211,7 +221,8 @@ static int vcn_v4_0_sw_init(void *handle)
ring = &adev->vcn.inst[i].ring_enc[0];
ring->use_doorbell = true;
if (amdgpu_sriov_vf(adev))
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i *
+ (adev->vcn.inst[i].num_enc_rings + 1) + 1;
else
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
ring->vm_hub = AMDGPU_MMHUB0(0);
@@ -223,16 +234,21 @@ static int vcn_v4_0_sw_init(void *handle)
return r;
vcn_v4_0_fw_shared_init(adev, i);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
}
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
r = amdgpu_vcn_ras_sw_init(adev);
if (r)
@@ -247,19 +263,23 @@ static int vcn_v4_0_sw_init(void *handle)
adev->vcn.ip_dump = ptr;
}
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return 0;
}
/**
* vcn_v4_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v4_0_sw_fini(void *handle)
+static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r, idx;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -280,27 +300,35 @@ static int vcn_v4_0_sw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
- r = amdgpu_vcn_sw_fini(adev);
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_sw_fini(adev, i);
+ if (r)
+ return r;
+ }
kfree(adev->vcn.ip_dump);
- return r;
+ return 0;
}
/**
* vcn_v4_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v4_0_hw_init(void *handle)
+static int vcn_v4_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r;
@@ -341,29 +369,32 @@ static int vcn_v4_0_hw_init(void *handle)
/**
* vcn_v4_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v4_0_hw_fini(void *handle)
+static int vcn_v4_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
+
+ cancel_delayed_work_sync(&vinst->idle_work);
+
if (!amdgpu_sriov_vf(adev)) {
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, i, regUVD_STATUS))) {
- vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, regUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
}
}
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
- amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
+ amdgpu_irq_put(adev, &vinst->ras_poison_irq, 0);
}
return 0;
@@ -372,41 +403,47 @@ static int vcn_v4_0_hw_fini(void *handle)
/**
* vcn_v4_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v4_0_suspend(void *handle)
+static int vcn_v4_0_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v4_0_hw_fini(adev);
+ r = vcn_v4_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- return r;
+ return 0;
}
/**
* vcn_v4_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v4_0_resume(void *handle)
+static int vcn_v4_0_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v4_0_hw_init(adev);
+ r = vcn_v4_0_hw_init(ip_block);
return r;
}
@@ -414,17 +451,18 @@ static int vcn_v4_0_resume(void *handle)
/**
* vcn_v4_0_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_mc_resume(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -474,17 +512,19 @@ static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst)
/**
* vcn_v4_0_mc_resume_dpg_mode - memory controller programming for dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Let the VCN memory controller know it's offsets with dpg mode
*/
-static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -575,19 +615,21 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
/* VCN global tiling registers */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
- VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+ VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG),
+ adev->gfx.config.gb_addr_config, 0, indirect);
}
/**
* vcn_v4_0_disable_static_power_gating - disable VCN static power gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Disable static power gating for VCN block
*/
-static void vcn_v4_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -646,13 +688,14 @@ static void vcn_v4_0_disable_static_power_gating(struct amdgpu_device *adev, int
/**
* vcn_v4_0_enable_static_power_gating - enable VCN static power gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Enable static power gating for VCN block
*/
-static void vcn_v4_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -701,13 +744,14 @@ static void vcn_v4_0_enable_static_power_gating(struct amdgpu_device *adev, int
/**
* vcn_v4_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v4_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -812,16 +856,18 @@ static void vcn_v4_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
/**
* vcn_v4_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
* @sram_sel: sram select
- * @inst_idx: instance number index
* @indirect: indirectly write sram
*
* Disable clock gating for VCN block with dpg mode
*/
-static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
- int inst_idx, uint8_t indirect)
+static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -869,13 +915,14 @@ static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, u
/**
* vcn_v4_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -925,9 +972,11 @@ static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data);
}
-static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx,
+static void vcn_v4_0_enable_ras(struct amdgpu_vcn_inst *vinst,
bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t tmp;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
@@ -950,14 +999,15 @@ static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx,
/**
* vcn_v4_0_start_dpg_mode - VCN start with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Start VCN block with dpg mode
*/
-static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v4_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t tmp;
@@ -975,7 +1025,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v4_0_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+ vcn_v4_0_disable_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -1023,7 +1073,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v4_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v4_0_mc_resume_dpg_mode(vinst, indirect);
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
@@ -1035,7 +1085,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
- vcn_v4_0_enable_ras(adev, inst_idx, indirect);
+ vcn_v4_0_enable_ras(vinst, indirect);
/* enable master interrupt */
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
@@ -1072,6 +1122,11 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+
return 0;
}
@@ -1079,181 +1134,184 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
/**
* vcn_v4_0_start - VCN start
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Start VCN block
*/
-static int vcn_v4_0_start(struct amdgpu_device *adev)
+static int vcn_v4_0_start(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t tmp;
- int i, j, k, r;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, i);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v4_0_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ /* disable VCN power gating */
+ vcn_v4_0_disable_static_power_gating(vinst);
- /* disable VCN power gating */
- vcn_v4_0_disable_static_power_gating(adev, i);
-
- /* set VCN status busy */
- tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
-
- /*SW clock gating */
- vcn_v4_0_disable_clock_gating(adev, i);
-
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
-
- /* setup regUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
- WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- /* setup regUVD_MPC_CNTL */
- tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
- WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
-
- /* setup UVD_MPC_SET_MUXA0 */
- WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUXB0 */
- WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUX */
- WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
-
- vcn_v4_0_mc_resume(adev, i);
-
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
-
- /* release VCPU reset to boot */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- for (j = 0; j < 10; ++j) {
- uint32_t status;
-
- for (k = 0; k < 100; ++k) {
- status = RREG32_SOC15(VCN, i, regUVD_STATUS);
- if (status & 2)
- break;
- mdelay(10);
- if (amdgpu_emu_mode == 1)
- msleep(1);
- }
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
+
+ /*SW clock gating */
+ vcn_v4_0_disable_clock_gating(vinst);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- if (amdgpu_emu_mode == 1) {
- r = -1;
- if (status & 2) {
- r = 0;
- break;
- }
- } else {
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
r = 0;
- if (status & 2)
- break;
-
- dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- mdelay(10);
- r = -1;
+ break;
}
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
}
+ }
- if (r) {
- dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
- return r;
- }
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- ring = &adev->vcn.inst[i].ring_enc[0];
- WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
- ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
- VCN_RB1_DB_CTRL__EN_MASK);
-
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
-
- tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
- tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
- WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
- fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
- WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
- WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
-
- tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
- WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
- ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
-
- tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
- tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
- WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
- fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
- }
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
return 0;
}
@@ -1334,7 +1392,7 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
regUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
@@ -1509,17 +1567,18 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
/**
* vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
*
* Stop VCN block with dpg mode
*/
-static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
uint32_t tmp;
- vcn_v4_0_pause_dpg_mode(adev, inst_idx, &state);
+ vcn_v4_0_pause_dpg_mode(vinst, &state);
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -1534,90 +1593,102 @@ static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
}
/**
* vcn_v4_0_stop - VCN stop
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Stop VCN block
*/
-static int vcn_v4_0_stop(struct amdgpu_device *adev)
+static int vcn_v4_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
uint32_t tmp;
- int i, r = 0;
+ int r = 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- vcn_v4_0_stop_dpg_mode(adev, i);
- continue;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_stop_dpg_mode(vinst);
+ r = 0;
+ goto done;
+ }
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
- if (r)
- return r;
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- /* disable LMI UMC channel */
- tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
- /* block VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* reset VCPU */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
- /* apply soft reset */
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ /* clear status */
+ WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
- /* clear status */
- WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
+ /* apply HW clock gating */
+ vcn_v4_0_enable_clock_gating(vinst);
- /* apply HW clock gating */
- vcn_v4_0_enable_clock_gating(adev, i);
+ /* enable VCN power gating */
+ vcn_v4_0_enable_static_power_gating(vinst);
- /* enable VCN power gating */
- vcn_v4_0_enable_static_power_gating(adev, i);
- }
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+done:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, i);
return 0;
}
@@ -1625,15 +1696,16 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev)
/**
* vcn_v4_0_pause_dpg_mode - VCN pause with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @new_state: pause state
*
* Pause dpg mode for VCN block
*/
-static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
- struct dpg_pause_state *new_state)
+static int vcn_v4_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
int ret_code;
@@ -1895,6 +1967,20 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
return 0;
}
+static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+ if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+ return -EOPNOTSUPP;
+
+ vcn_v4_0_stop(vinst);
+ vcn_v4_0_start(vinst);
+
+ return amdgpu_ring_test_helper(ring);
+}
+
static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
@@ -1924,6 +2010,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v4_0_ring_reset,
};
/**
@@ -1953,13 +2040,13 @@ static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev)
/**
* vcn_v4_0_is_idle - check VCN block is idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block structure
*
* Check whether VCN block is idle
*/
-static bool vcn_v4_0_is_idle(void *handle)
+static bool vcn_v4_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1975,13 +2062,13 @@ static bool vcn_v4_0_is_idle(void *handle)
/**
* vcn_v4_0_wait_for_idle - wait for VCN block idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Wait for VCN block idle
*/
-static int vcn_v4_0_wait_for_idle(void *handle)
+static int vcn_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -2000,65 +2087,61 @@ static int vcn_v4_0_wait_for_idle(void *handle)
/**
* vcn_v4_0_set_clockgating_state - set VCN block clockgating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: clock gating state
*
* Set VCN block clockgating state
*/
-static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state)
+static int vcn_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
if (enable) {
if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
return -EBUSY;
- vcn_v4_0_enable_clock_gating(adev, i);
+ vcn_v4_0_enable_clock_gating(vinst);
} else {
- vcn_v4_0_disable_clock_gating(adev, i);
+ vcn_v4_0_disable_clock_gating(vinst);
}
}
return 0;
}
-/**
- * vcn_v4_0_set_powergating_state - set VCN block powergating state
- *
- * @handle: amdgpu_device pointer
- * @state: power gating state
- *
- * Set VCN block powergating state
- */
-static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state)
+static int vcn_v4_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
/* for SRIOV, guest should not control VCN Power-gating
* MMSCH FW should control Power-gating and clock-gating
* guest should avoid touching CGC and PG
*/
if (amdgpu_sriov_vf(adev)) {
- adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
return 0;
}
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v4_0_stop(adev);
+ ret = vcn_v4_0_stop(vinst);
else
- ret = vcn_v4_0_start(adev);
+ ret = vcn_v4_0_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
return ret;
}
@@ -2150,17 +2233,17 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v4_0_irq_funcs;
- adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v4_0_ras_irq_funcs;
}
}
-static void vcn_v4_0_print_ip_state(void *handle, struct drm_printer *p)
+static void vcn_v4_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0);
uint32_t inst_off, is_powered;
@@ -2190,9 +2273,9 @@ static void vcn_v4_0_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void vcn_v4_0_dump_ip_state(void *handle)
+static void vcn_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
bool is_powered;
uint32_t inst_off;
@@ -2222,7 +2305,6 @@ static void vcn_v4_0_dump_ip_state(void *handle)
static const struct amd_ip_funcs vcn_v4_0_ip_funcs = {
.name = "vcn_v4_0",
.early_init = vcn_v4_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v4_0_sw_init,
.sw_fini = vcn_v4_0_sw_fini,
.hw_init = vcn_v4_0_hw_init,
@@ -2231,12 +2313,8 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = {
.resume = vcn_v4_0_resume,
.is_idle = vcn_v4_0_is_idle,
.wait_for_idle = vcn_v4_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v4_0_set_clockgating_state,
- .set_powergating_state = vcn_v4_0_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
.dump_ip_state = vcn_v4_0_dump_ip_state,
.print_ip_state = vcn_v4_0_print_ip_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 0fda70336300..5a33140f5723 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -31,6 +31,7 @@
#include "soc15d.h"
#include "soc15_hw_ip.h"
#include "vcn_v2_0.h"
+#include "vcn_v4_0_3.h"
#include "mmsch_v4_0_3.h"
#include "vcn/vcn_4_0_3_offset.h"
@@ -44,6 +45,7 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
+#define VCN1_AON_SOC_ADDRESS_3_0 0x48000
static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = {
SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS),
@@ -87,68 +89,101 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = {
static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v4_0_3_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v4_0_3_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring);
static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
int inst_idx, bool indirect);
+
+static inline bool vcn_v4_0_3_normalizn_reqd(struct amdgpu_device *adev)
+{
+ return (adev->vcn.caps & AMDGPU_VCN_CAPS(RRMT_ENABLED)) == 0;
+}
+
/**
* vcn_v4_0_3_early_init - set function pointers
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
*/
-static int vcn_v4_0_3_early_init(void *handle)
+static int vcn_v4_0_3_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
- /* re-use enc ring as unified ring */
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
vcn_v4_0_3_set_unified_ring_funcs(adev);
vcn_v4_0_3_set_irq_funcs(adev);
vcn_v4_0_3_set_ras_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v4_0_3_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+
+ return 0;
}
/**
* vcn_v4_0_3_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v4_0_3_sw_init(void *handle)
+static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r, vcn_inst;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3);
uint32_t *ptr;
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
-
- amdgpu_vcn_setup_ucode(adev);
-
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
-
/* VCN DEC TRAP */
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
if (r)
return r;
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst->ras_poison_irq);
+
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
vcn_inst = GET_INST(VCN, i);
@@ -172,23 +207,22 @@ static int vcn_v4_0_3_sw_init(void *handle)
if (r)
return r;
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
- fw_shared->sq.is_enabled = true;
+ vcn_v4_0_3_fw_shared_init(adev, i);
- if (amdgpu_vcnfw_log)
- amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
}
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
-
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
r = amdgpu_vcn_ras_sw_init(adev);
if (r) {
@@ -206,19 +240,23 @@ static int vcn_v4_0_3_sw_init(void *handle)
adev->vcn.ip_dump = ptr;
}
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
return 0;
}
/**
* vcn_v4_0_3_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v4_0_3_sw_fini(void *handle)
+static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r, idx;
if (drm_dev_enter(&adev->ddev, &idx)) {
@@ -235,29 +273,63 @@ static int vcn_v4_0_3_sw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
- r = amdgpu_vcn_sw_fini(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_sw_fini(adev, i);
+ if (r)
+ return r;
+ }
kfree(adev->vcn.ip_dump);
- return r;
+ return 0;
+}
+
+static int vcn_v4_0_3_hw_init_inst(struct amdgpu_vcn_inst *vinst)
+{
+ int vcn_inst;
+ struct amdgpu_device *adev = vinst->adev;
+ struct amdgpu_ring *ring;
+ int inst_idx = vinst->inst;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+ if (ring->use_doorbell) {
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst,
+ adev->vcn.inst[inst_idx].aid_id);
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+ }
+
+ return 0;
}
/**
* vcn_v4_0_3_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v4_0_3_hw_init(void *handle)
+static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
- int i, r, vcn_inst;
+ struct amdgpu_vcn_inst *vinst;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
r = vcn_v4_0_3_start_sriov(adev);
@@ -272,29 +344,22 @@ static int vcn_v4_0_3_hw_init(void *handle)
ring->sched.ready = true;
}
} else {
+ /* This flag is not set for VF, assumed to be disabled always */
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) &
+ 0x100)
+ adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- vcn_inst = GET_INST(VCN, i);
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
ring = &adev->vcn.inst[i].ring_enc[0];
+ vinst = &adev->vcn.inst[i];
+ vcn_v4_0_3_hw_init_inst(vinst);
- if (ring->use_doorbell) {
- adev->nbio.funcs->vcn_doorbell_range(
- adev, ring->use_doorbell,
- (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
- 9 * vcn_inst,
- adev->vcn.inst[i].aid_id);
-
- WREG32_SOC15(
- VCN, GET_INST(VCN, ring->me),
- regVCN_RB1_DB_CTRL,
- ring->doorbell_index
- << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
- VCN_RB1_DB_CTRL__EN_MASK);
-
- /* Read DB_CTRL to flush the write DB_CTRL command. */
- RREG32_SOC15(
- VCN, GET_INST(VCN, ring->me),
- regVCN_RB1_DB_CTRL);
- }
+ /* Re-init fw_shared when RAS fatal error occurred */
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ if (!fw_shared->sq.is_enabled)
+ vcn_v4_0_3_fw_shared_init(adev, i);
r = amdgpu_ring_test_helper(ring);
if (r)
@@ -308,18 +373,26 @@ static int vcn_v4_0_3_hw_init(void *handle)
/**
* vcn_v4_0_3_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v4_0_3_hw_fini(void *handle)
+static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ cancel_delayed_work_sync(&vinst->idle_work);
+
+ if (vinst->cur_state != AMD_PG_STATE_GATE)
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
- if (adev->vcn.cur_state != AMD_PG_STATE_GATE)
- vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0);
return 0;
}
@@ -327,41 +400,47 @@ static int vcn_v4_0_3_hw_fini(void *handle)
/**
* vcn_v4_0_3_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v4_0_3_suspend(void *handle)
+static int vcn_v4_0_3_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v4_0_3_hw_fini(adev);
+ r = vcn_v4_0_3_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
- return r;
+ return 0;
}
/**
* vcn_v4_0_3_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v4_0_3_resume(void *handle)
+static int vcn_v4_0_3_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v4_0_3_hw_init(adev);
+ r = vcn_v4_0_3_hw_init(ip_block);
return r;
}
@@ -369,17 +448,18 @@ static int vcn_v4_0_3_resume(void *handle)
/**
* vcn_v4_0_3_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_3_mc_resume(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t offset, size, vcn_inst;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
vcn_inst = GET_INST(VCN, inst_idx);
@@ -443,18 +523,20 @@ static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx)
/**
* vcn_v4_0_3_mc_resume_dpg_mode - memory controller programming for dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Let the VCN memory controller know it's offsets with dpg mode
*/
-static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -557,13 +639,14 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i
/**
* vcn_v4_0_3_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t data;
int vcn_inst;
@@ -650,16 +733,18 @@ static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst
/**
* vcn_v4_0_3_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
* @sram_sel: sram select
- * @inst_idx: instance number index
* @indirect: indirectly write sram
*
* Disable clock gating for VCN block with dpg mode
*/
-static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
- int inst_idx, uint8_t indirect)
+static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -701,13 +786,14 @@ static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v4_0_3_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t data;
int vcn_inst;
@@ -752,14 +838,16 @@ static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_
/**
* vcn_v4_0_3_start_dpg_mode - VCN start with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Start VCN block with dpg mode
*/
-static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
volatile struct amdgpu_vcn4_fw_shared *fw_shared =
adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
@@ -787,7 +875,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
}
/* enable clock gating */
- vcn_v4_0_3_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+ vcn_v4_0_3_disable_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -837,7 +925,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v4_0_3_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v4_0_3_mc_resume_dpg_mode(vinst, indirect);
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
@@ -889,6 +977,11 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
/*resetting done, fw can check RB ring */
fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
return 0;
}
@@ -929,6 +1022,8 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
vcn_inst = GET_INST(VCN, i);
+ vcn_v4_0_3_fw_shared_init(adev, vcn_inst);
+
memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
header.version = MMSCH_VERSION;
header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
@@ -941,7 +1036,7 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
- cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
@@ -1082,189 +1177,185 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
/**
* vcn_v4_0_3_start - VCN start
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Start VCN block
*/
-static int vcn_v4_0_3_start(struct amdgpu_device *adev)
+static int vcn_v4_0_3_start(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
struct amdgpu_ring *ring;
- int i, j, k, r, vcn_inst;
+ int j, k, r, vcn_inst;
uint32_t tmp;
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v4_0_3_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ vcn_inst = GET_INST(VCN, i);
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) |
+ UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
- vcn_inst = GET_INST(VCN, i);
- /* set VCN status busy */
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) |
- UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
-
- /*SW clock gating */
- vcn_v4_0_3_disable_clock_gating(adev, i);
-
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK,
- ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
- tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
-
- /* setup regUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
- WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL,
- tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- /* setup regUVD_MPC_CNTL */
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL);
- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
- WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp);
-
- /* setup UVD_MPC_SET_MUXA0 */
- WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUXB0 */
- WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUX */
- WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
-
- vcn_v4_0_3_mc_resume(adev, i);
-
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
-
- /* release VCPU reset to boot */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* SW clock gating */
+ vcn_v4_0_3_disable_clock_gating(vinst);
- for (j = 0; j < 10; ++j) {
- uint32_t status;
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK,
+ ~UVD_VCPU_CNTL__CLK_EN_MASK);
- for (k = 0; k < 100; ++k) {
- status = RREG32_SOC15(VCN, vcn_inst,
- regUVD_STATUS);
- if (status & 2)
- break;
- mdelay(10);
- }
- r = 0;
- if (status & 2)
- break;
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- DRM_DEV_ERROR(adev->dev,
- "VCN decode not responding, trying to reset the VCPU!!!\n");
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
- regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
- regUVD_VCPU_CNTL),
- 0, ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL,
+ tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_3_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, vcn_inst,
+ regUVD_STATUS);
+ if (status & 2)
+ break;
mdelay(10);
- r = -1;
}
+ r = 0;
+ if (status & 2)
+ break;
- if (r) {
- DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n");
- return r;
- }
+ DRM_DEV_ERROR(adev->dev,
+ "VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
+ regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
+ regUVD_VCPU_CNTL),
+ 0, ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ mdelay(10);
+ r = -1;
+ }
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n");
+ return r;
+ }
- ring = &adev->vcn.inst[i].ring_enc[0];
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* program the RB_BASE for ring buffer */
- WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
- upper_32_bits(ring->gpu_addr));
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
- ring->ring_size / sizeof(uint32_t));
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- /* resetting ring, fw should not check RB ring */
- tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
- tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
- WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ /* program the RB_BASE for ring buffer */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
+ upper_32_bits(ring->gpu_addr));
- /* Initialize the ring buffer's read and write pointers */
- WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
- WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
+ ring->ring_size / sizeof(uint32_t));
- tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
- tmp |= VCN_RB_ENABLE__RB_EN_MASK;
- WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ /* resetting ring, fw should not check RB ring */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
- ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
- fw_shared->sq.queue_mode &=
- cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF));
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ fw_shared->sq.queue_mode &=
+ cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF));
- }
return 0;
}
/**
* vcn_v4_0_3_stop_dpg_mode - VCN stop with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
*
* Stop VCN block with dpg mode
*/
-static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t tmp;
int vcn_inst;
@@ -1284,106 +1375,114 @@ static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
return 0;
}
/**
* vcn_v4_0_3_stop - VCN stop
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Stop VCN block
*/
-static int vcn_v4_0_3_stop(struct amdgpu_device *adev)
+static int vcn_v4_0_3_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
- int i, r = 0, vcn_inst;
+ int r = 0, vcn_inst;
uint32_t tmp;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- vcn_inst = GET_INST(VCN, i);
+ vcn_inst = GET_INST(VCN, i);
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- vcn_v4_0_3_stop_dpg_mode(adev, i);
- continue;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_3_stop_dpg_mode(vinst);
+ goto Done;
+ }
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS,
- UVD_STATUS__IDLE, 0x7);
- if (r)
- goto Done;
-
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
- tmp);
- if (r)
- goto Done;
-
- /* stall UMC channel */
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
- tmp);
- if (r)
- goto Done;
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS,
+ UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto Done;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
+ tmp);
+ if (r)
+ goto Done;
+
+ /* stall UMC channel */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
+ tmp);
+ if (r)
+ goto Done;
- /* Unblock VCPU Register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* Unblock VCPU Register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
- /* release VCPU reset to boot */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
- /* reset LMI UMC/LMI/VCPU */
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+ /* reset LMI UMC/LMI/VCPU */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
- /* clear VCN status */
- WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
+ /* clear VCN status */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
- /* apply HW clock gating */
- vcn_v4_0_3_enable_clock_gating(adev, i);
- }
-Done:
- if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ /* apply HW clock gating */
+ vcn_v4_0_3_enable_clock_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+Done:
return 0;
}
/**
* vcn_v4_0_3_pause_dpg_mode - VCN pause with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @new_state: pause state
*
* Pause dpg mode for VCN block
*/
-static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
- struct dpg_pause_state *new_state)
+static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
return 0;
@@ -1427,11 +1526,11 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring)
regUVD_RB_WPTR);
}
-static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
- uint32_t val, uint32_t mask)
+void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
{
- /* For VF, only local offsets should be used */
- if (amdgpu_sriov_vf(ring->adev))
+ /* Use normalized offsets when required */
+ if (vcn_v4_0_3_normalizn_reqd(ring->adev))
reg = NORMALIZE_VCN_REG_OFFSET(reg);
amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
@@ -1440,10 +1539,11 @@ static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t
amdgpu_ring_write(ring, val);
}
-static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
+void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
{
- /* For VF, only local offsets should be used */
- if (amdgpu_sriov_vf(ring->adev))
+ /* Use normalized offsets when required */
+ if (vcn_v4_0_3_normalizn_reqd(ring->adev))
reg = NORMALIZE_VCN_REG_OFFSET(reg);
amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
@@ -1451,8 +1551,8 @@ static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg
amdgpu_ring_write(ring, val);
}
-static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned int vmid, uint64_t pd_addr)
+void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
{
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
@@ -1464,7 +1564,7 @@ static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
lower_32_bits(pd_addr), 0xffffffff);
}
-static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
/* VCN engine access for HDP flush doesn't work when RRMT is enabled.
* This is a workaround to avoid any HDP flush through VCN ring.
@@ -1494,6 +1594,37 @@ static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
+static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ int r = 0;
+ int vcn_inst;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+ if (amdgpu_sriov_vf(ring->adev))
+ return -EOPNOTSUPP;
+
+ if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+ return -EOPNOTSUPP;
+
+ vcn_inst = GET_INST(VCN, ring->me);
+ r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst);
+
+ if (r) {
+ DRM_DEV_ERROR(adev->dev, "VCN reset fail : %d\n", r);
+ return r;
+ }
+
+ /* This flag is not set for VF, assumed to be disabled always */
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+ vcn_v4_0_3_hw_init_inst(vinst);
+ vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram);
+ r = amdgpu_ring_test_helper(ring);
+
+ return r;
+}
+
static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
@@ -1522,6 +1653,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
.emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v4_0_3_ring_reset,
};
/**
@@ -1547,13 +1679,13 @@ static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev)
/**
* vcn_v4_0_3_is_idle - check VCN block is idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block structure
*
* Check whether VCN block is idle
*/
-static bool vcn_v4_0_3_is_idle(void *handle)
+static bool vcn_v4_0_3_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1567,13 +1699,13 @@ static bool vcn_v4_0_3_is_idle(void *handle)
/**
* vcn_v4_0_3_wait_for_idle - wait for VCN block idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Wait for VCN block idle
*/
-static int vcn_v4_0_3_wait_for_idle(void *handle)
+static int vcn_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1588,64 +1720,58 @@ static int vcn_v4_0_3_wait_for_idle(void *handle)
/* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: clock gating state
*
* Set VCN block clockgating state
*/
-static int vcn_v4_0_3_set_clockgating_state(void *handle,
+static int vcn_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = state == AMD_CG_STATE_GATE;
int i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (enable) {
if (RREG32_SOC15(VCN, GET_INST(VCN, i),
regUVD_STATUS) != UVD_STATUS__IDLE)
return -EBUSY;
- vcn_v4_0_3_enable_clock_gating(adev, i);
+ vcn_v4_0_3_enable_clock_gating(vinst);
} else {
- vcn_v4_0_3_disable_clock_gating(adev, i);
+ vcn_v4_0_3_disable_clock_gating(vinst);
}
}
return 0;
}
-/**
- * vcn_v4_0_3_set_powergating_state - set VCN block powergating state
- *
- * @handle: amdgpu_device pointer
- * @state: power gating state
- *
- * Set VCN block powergating state
- */
-static int vcn_v4_0_3_set_powergating_state(void *handle,
- enum amd_powergating_state state)
+static int vcn_v4_0_3_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
/* for SRIOV, guest should not control VCN Power-gating
* MMSCH FW should control Power-gating and clock-gating
* guest should avoid touching CGC and PG
*/
if (amdgpu_sriov_vf(adev)) {
- adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
return 0;
}
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v4_0_3_stop(adev);
+ ret = vcn_v4_0_3_stop(vinst);
else
- ret = vcn_v4_0_3_start(adev);
+ ret = vcn_v4_0_3_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
return ret;
}
@@ -1711,11 +1837,24 @@ static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static int vcn_v4_0_3_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = {
.set = vcn_v4_0_3_set_interrupt_state,
.process = vcn_v4_0_3_process_interrupt,
};
+static const struct amdgpu_irq_src_funcs vcn_v4_0_3_ras_irq_funcs = {
+ .set = vcn_v4_0_3_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
/**
* vcn_v4_0_3_set_irq_funcs - set VCN block interrupt irq functions
*
@@ -1731,11 +1870,14 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
adev->vcn.inst->irq.num_types++;
}
adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs;
+
+ adev->vcn.inst->ras_poison_irq.num_types = 1;
+ adev->vcn.inst->ras_poison_irq.funcs = &vcn_v4_0_3_ras_irq_funcs;
}
-static void vcn_v4_0_3_print_ip_state(void *handle, struct drm_printer *p)
+static void vcn_v4_0_3_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3);
uint32_t inst_off, is_powered;
@@ -1765,9 +1907,9 @@ static void vcn_v4_0_3_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void vcn_v4_0_3_dump_ip_state(void *handle)
+static void vcn_v4_0_3_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
bool is_powered;
uint32_t inst_off, inst_id;
@@ -1798,7 +1940,6 @@ static void vcn_v4_0_3_dump_ip_state(void *handle)
static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
.name = "vcn_v4_0_3",
.early_init = vcn_v4_0_3_early_init,
- .late_init = NULL,
.sw_init = vcn_v4_0_3_sw_init,
.sw_fini = vcn_v4_0_3_sw_fini,
.hw_init = vcn_v4_0_3_hw_init,
@@ -1807,12 +1948,8 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
.resume = vcn_v4_0_3_resume,
.is_idle = vcn_v4_0_3_is_idle,
.wait_for_idle = vcn_v4_0_3_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v4_0_3_set_clockgating_state,
- .set_powergating_state = vcn_v4_0_3_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
.dump_ip_state = vcn_v4_0_3_dump_ip_state,
.print_ip_state = vcn_v4_0_3_print_ip_state,
};
@@ -1883,14 +2020,143 @@ static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
vcn_v4_0_3_inst_reset_ras_error_count(adev, i);
}
+static uint32_t vcn_v4_0_3_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V4_0_3_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v4_0_3_query_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V4_0_3_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v4_0_3_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = {
.query_ras_error_count = vcn_v4_0_3_query_ras_error_count,
.reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count,
+ .query_poison_status = vcn_v4_0_3_query_poison_status,
};
+static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int vcn_v4_0_3_err_codes[] = {
+ 14, 15, /* VCN */
+};
+
+static bool vcn_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ vcn_v4_0_3_err_codes,
+ ARRAY_SIZE(vcn_v4_0_3_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops vcn_v4_0_3_aca_bank_ops = {
+ .aca_bank_parser = vcn_v4_0_3_aca_bank_parser,
+ .aca_bank_is_valid = vcn_v4_0_3_aca_bank_is_valid,
+};
+
+static const struct aca_info vcn_v4_0_3_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &vcn_v4_0_3_aca_bank_ops,
+};
+
+static int vcn_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ if (amdgpu_ras_is_supported(adev, ras_block->block) &&
+ adev->vcn.inst->ras_poison_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->vcn.inst->ras_poison_irq, 0);
+ if (r)
+ goto late_fini;
+ }
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN,
+ &vcn_v4_0_3_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
static struct amdgpu_vcn_ras vcn_v4_0_3_ras = {
.ras_block = {
.hw_ops = &vcn_v4_0_3_ras_hw_ops,
+ .ras_late_init = vcn_v4_0_3_ras_late_init,
},
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
index 0b046114373a..aeab89853a92 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h
@@ -24,6 +24,21 @@
#ifndef __VCN_V4_0_3_H__
#define __VCN_V4_0_3_H__
+enum amdgpu_vcn_v4_0_3_sub_block {
+ AMDGPU_VCN_V4_0_3_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V4_0_3_MAX_SUB_BLOCK,
+};
+
extern const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block;
+void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
+
+void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val);
+void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr);
+void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring);
+
#endif /* __VCN_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index 9d4f5352a62c..16ade84facc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -46,6 +46,7 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000)
+#define VCN1_AON_SOC_ADDRESS_3_0 (0x48000 + 0x38000)
#define VCN_HARVEST_MMSCH 0
@@ -95,56 +96,60 @@ static int amdgpu_ih_clientid_vcns[] = {
static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v4_0_5_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v4_0_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring);
/**
* vcn_v4_0_5_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v4_0_5_early_init(void *handle)
+static int vcn_v4_0_5_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6))
+ adev->vcn.per_inst_fw = true;
- /* re-use enc ring as unified ring */
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
vcn_v4_0_5_set_unified_ring_funcs(adev);
vcn_v4_0_5_set_irq_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v4_0_5_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
}
/**
* vcn_v4_0_5_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v4_0_5_sw_init(void *handle)
+static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5);
uint32_t *ptr;
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
-
- amdgpu_vcn_setup_ucode(adev);
-
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
@@ -152,6 +157,16 @@ static int vcn_v4_0_5_sw_init(void *handle)
if (adev->vcn.harvest_config & (1 << i))
continue;
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
atomic_set(&adev->vcn.inst[i].sched_score, 0);
/* VCN UNIFIED TRAP */
@@ -170,7 +185,7 @@ static int vcn_v4_0_5_sw_init(void *handle)
ring->use_doorbell = true;
if (amdgpu_sriov_vf(adev))
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
- i * (adev->vcn.num_enc_rings + 1) + 1;
+ i * (adev->vcn.inst[i].num_enc_rings + 1) + 1;
else
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
2 + 8 * i;
@@ -193,19 +208,30 @@ static int vcn_v4_0_5_sw_init(void *handle)
if (amdgpu_sriov_vf(adev))
fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+ fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT;
+ fw_shared->drm_key_wa.method =
+ AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING;
+
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode;
}
+ adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
+
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode;
-
/* Allocate memory for VCN IP Dump buffer */
ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL);
if (!ptr) {
@@ -220,13 +246,13 @@ static int vcn_v4_0_5_sw_init(void *handle)
/**
* vcn_v4_0_5_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v4_0_5_sw_fini(void *handle)
+static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r, idx;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -247,27 +273,31 @@ static int vcn_v4_0_5_sw_fini(void *handle)
if (amdgpu_sriov_vf(adev))
amdgpu_virt_free_mm_table(adev);
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
- r = amdgpu_vcn_sw_fini(adev);
+ r = amdgpu_vcn_sw_fini(adev, i);
+ if (r)
+ return r;
+ }
kfree(adev->vcn.ip_dump);
- return r;
+ return 0;
}
/**
* vcn_v4_0_5_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v4_0_5_hw_init(void *handle)
+static int vcn_v4_0_5_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r;
@@ -291,25 +321,28 @@ static int vcn_v4_0_5_hw_init(void *handle)
/**
* vcn_v4_0_5_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v4_0_5_hw_fini(void *handle)
+static int vcn_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
+
+ cancel_delayed_work_sync(&vinst->idle_work);
+
if (!amdgpu_sriov_vf(adev)) {
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, i, regUVD_STATUS))) {
- vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, regUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
}
}
}
@@ -320,20 +353,24 @@ static int vcn_v4_0_5_hw_fini(void *handle)
/**
* vcn_v4_0_5_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v4_0_5_suspend(void *handle)
+static int vcn_v4_0_5_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v4_0_5_hw_fini(adev);
+ r = vcn_v4_0_5_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
return r;
}
@@ -341,20 +378,22 @@ static int vcn_v4_0_5_suspend(void *handle)
/**
* vcn_v4_0_5_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v4_0_5_resume(void *handle)
+static int vcn_v4_0_5_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v4_0_5_hw_init(adev);
+ r = vcn_v4_0_5_hw_init(ip_block);
return r;
}
@@ -362,17 +401,18 @@ static int vcn_v4_0_5_resume(void *handle)
/**
* vcn_v4_0_5_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v4_0_5_mc_resume(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_5_mc_resume(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -422,18 +462,20 @@ static void vcn_v4_0_5_mc_resume(struct amdgpu_device *adev, int inst)
/**
* vcn_v4_0_5_mc_resume_dpg_mode - memory controller programming for dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Let the VCN memory controller know it's offsets with dpg mode
*/
-static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -536,13 +578,14 @@ static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i
/**
* vcn_v4_0_5_disable_static_power_gating - disable VCN static power gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Disable static power gating for VCN block
*/
-static void vcn_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_5_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -595,13 +638,14 @@ static void vcn_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev, i
/**
* vcn_v4_0_5_enable_static_power_gating - enable VCN static power gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Enable static power gating for VCN block
*/
-static void vcn_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_5_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -637,13 +681,14 @@ static void vcn_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev, in
/**
* vcn_v4_0_5_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v4_0_5_disable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_5_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -748,16 +793,18 @@ static void vcn_v4_0_5_disable_clock_gating(struct amdgpu_device *adev, int inst
/**
* vcn_v4_0_5_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
* @sram_sel: sram select
- * @inst_idx: instance number index
* @indirect: indirectly write sram
*
* Disable clock gating for VCN block with dpg mode
*/
-static void vcn_v4_0_5_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
- int inst_idx, uint8_t indirect)
+static void vcn_v4_0_5_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -805,13 +852,14 @@ static void vcn_v4_0_5_disable_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v4_0_5_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v4_0_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v4_0_5_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
@@ -864,14 +912,16 @@ static void vcn_v4_0_5_enable_clock_gating(struct amdgpu_device *adev, int inst)
/**
* vcn_v4_0_5_start_dpg_mode - VCN start with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Start VCN block with dpg mode
*/
-static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t tmp;
@@ -890,7 +940,7 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
(uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
/* enable clock gating */
- vcn_v4_0_5_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+ vcn_v4_0_5_disable_clock_gating_dpg_mode(vinst, 0, indirect);
/* enable VCPU clock */
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
@@ -938,7 +988,7 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
- vcn_v4_0_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v4_0_5_mc_resume_dpg_mode(vinst, indirect);
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
@@ -984,6 +1034,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL);
+
return 0;
}
@@ -991,182 +1045,184 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
/**
* vcn_v4_0_5_start - VCN start
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Start VCN block
*/
-static int vcn_v4_0_5_start(struct amdgpu_device *adev)
+static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t tmp;
- int i, j, k, r;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, i);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v4_0_5_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v4_0_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ /* disable VCN power gating */
+ vcn_v4_0_5_disable_static_power_gating(vinst);
- /* disable VCN power gating */
- vcn_v4_0_5_disable_static_power_gating(adev, i);
-
- /* set VCN status busy */
- tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
-
- /*SW clock gating */
- vcn_v4_0_5_disable_clock_gating(adev, i);
-
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
-
- /* setup regUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
- WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- /* setup regUVD_MPC_CNTL */
- tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
- tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
- tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
- WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
-
- /* setup UVD_MPC_SET_MUXA0 */
- WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUXB0 */
- WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
-
- /* setup UVD_MPC_SET_MUX */
- WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
-
- vcn_v4_0_5_mc_resume(adev, i);
-
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
-
- /* release VCPU reset to boot */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- for (j = 0; j < 10; ++j) {
- uint32_t status;
-
- for (k = 0; k < 100; ++k) {
- status = RREG32_SOC15(VCN, i, regUVD_STATUS);
- if (status & 2)
- break;
- mdelay(10);
- if (amdgpu_emu_mode == 1)
- msleep(1);
- }
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
+
+ /* SW clock gating */
+ vcn_v4_0_5_disable_clock_gating(vinst);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
- if (amdgpu_emu_mode == 1) {
- r = -1;
- if (status & 2) {
- r = 0;
- break;
- }
- } else {
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup regUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUX */
+ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v4_0_5_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
r = 0;
- if (status & 2)
- break;
-
- dev_err(adev->dev,
- "VCN[%d] is not responding, trying to reset VCPU!!!\n", i);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- mdelay(10);
- r = -1;
+ break;
}
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
}
+ }
- if (r) {
- dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
- return r;
- }
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- ring = &adev->vcn.inst[i].ring_enc[0];
- WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
- ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
- VCN_RB1_DB_CTRL__EN_MASK);
-
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
-
- tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
- tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
- WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
- fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
- WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
- WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
-
- tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
- WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
- ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
-
- tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
- tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
- WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
- fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
- }
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
return 0;
}
@@ -1174,13 +1230,14 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev)
/**
* vcn_v4_0_5_stop_dpg_mode - VCN stop with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
*
* Stop VCN block with dpg mode
*/
-static void vcn_v4_0_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_5_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t tmp;
/* Wait for power status to be 1 */
@@ -1197,106 +1254,119 @@ static void vcn_v4_0_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
}
/**
* vcn_v4_0_5_stop - VCN stop
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Stop VCN block
*/
-static int vcn_v4_0_5_stop(struct amdgpu_device *adev)
+static int vcn_v4_0_5_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
uint32_t tmp;
- int i, r = 0;
+ int r = 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- vcn_v4_0_5_stop_dpg_mode(adev, i);
- continue;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v4_0_5_stop_dpg_mode(vinst);
+ r = 0;
+ goto done;
+ }
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
- if (r)
- return r;
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
- /* disable LMI UMC channel */
- tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
- /* block VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
- /* reset VCPU */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
- /* apply soft reset */
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ /* clear status */
+ WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
- /* clear status */
- WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
+ /* apply HW clock gating */
+ vcn_v4_0_5_enable_clock_gating(vinst);
- /* apply HW clock gating */
- vcn_v4_0_5_enable_clock_gating(adev, i);
+ /* enable VCN power gating */
+ vcn_v4_0_5_enable_static_power_gating(vinst);
- /* enable VCN power gating */
- vcn_v4_0_5_enable_static_power_gating(adev, i);
- }
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+done:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, i);
- return 0;
+ return r;
}
/**
* vcn_v4_0_5_pause_dpg_mode - VCN pause with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @new_state: pause state
*
* Pause dpg mode for VCN block
*/
-static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
- struct dpg_pause_state *new_state)
+static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
int ret_code;
@@ -1395,7 +1465,21 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
-static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
+static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+ if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+ return -EOPNOTSUPP;
+
+ vcn_v4_0_5_stop(vinst);
+ vcn_v4_0_5_start(vinst);
+
+ return amdgpu_ring_test_helper(ring);
+}
+
+static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
@@ -1422,6 +1506,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v4_0_5_ring_reset,
};
/**
@@ -1439,6 +1524,9 @@ static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5))
+ vcn_v4_0_5_unified_ring_vm_funcs.secure_submission_supported = true;
+
adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_5_unified_ring_vm_funcs;
adev->vcn.inst[i].ring_enc[0].me = i;
}
@@ -1447,13 +1535,13 @@ static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev)
/**
* vcn_v4_0_5_is_idle - check VCN block is idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block structure
*
* Check whether VCN block is idle
*/
-static bool vcn_v4_0_5_is_idle(void *handle)
+static bool vcn_v4_0_5_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1469,13 +1557,13 @@ static bool vcn_v4_0_5_is_idle(void *handle)
/**
* vcn_v4_0_5_wait_for_idle - wait for VCN block idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Wait for VCN block idle
*/
-static int vcn_v4_0_5_wait_for_idle(void *handle)
+static int vcn_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1494,56 +1582,51 @@ static int vcn_v4_0_5_wait_for_idle(void *handle)
/**
* vcn_v4_0_5_set_clockgating_state - set VCN block clockgating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: clock gating state
*
* Set VCN block clockgating state
*/
-static int vcn_v4_0_5_set_clockgating_state(void *handle, enum amd_clockgating_state state)
+static int vcn_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
int i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
if (enable) {
if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
return -EBUSY;
- vcn_v4_0_5_enable_clock_gating(adev, i);
+ vcn_v4_0_5_enable_clock_gating(vinst);
} else {
- vcn_v4_0_5_disable_clock_gating(adev, i);
+ vcn_v4_0_5_disable_clock_gating(vinst);
}
}
return 0;
}
-/**
- * vcn_v4_0_5_set_powergating_state - set VCN block powergating state
- *
- * @handle: amdgpu_device pointer
- * @state: power gating state
- *
- * Set VCN block powergating state
- */
-static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_state state)
+static int vcn_v4_0_5_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
+ int ret = 0;
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v4_0_5_stop(adev);
+ ret = vcn_v4_0_5_stop(vinst);
else
- ret = vcn_v4_0_5_start(adev);
+ ret = vcn_v4_0_5_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
return ret;
}
@@ -1611,14 +1694,14 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v4_0_5_irq_funcs;
}
}
-static void vcn_v4_0_5_print_ip_state(void *handle, struct drm_printer *p)
+static void vcn_v4_0_5_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5);
uint32_t inst_off, is_powered;
@@ -1648,9 +1731,9 @@ static void vcn_v4_0_5_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void vcn_v4_0_5_dump_ip_state(void *handle)
+static void vcn_v4_0_5_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
bool is_powered;
uint32_t inst_off;
@@ -1680,7 +1763,6 @@ static void vcn_v4_0_5_dump_ip_state(void *handle)
static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = {
.name = "vcn_v4_0_5",
.early_init = vcn_v4_0_5_early_init,
- .late_init = NULL,
.sw_init = vcn_v4_0_5_sw_init,
.sw_fini = vcn_v4_0_5_sw_fini,
.hw_init = vcn_v4_0_5_hw_init,
@@ -1689,12 +1771,8 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = {
.resume = vcn_v4_0_5_resume,
.is_idle = vcn_v4_0_5_is_idle,
.wait_for_idle = vcn_v4_0_5_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v4_0_5_set_clockgating_state,
- .set_powergating_state = vcn_v4_0_5_set_powergating_state,
+ .set_powergating_state = vcn_set_powergating_state,
.dump_ip_state = vcn_v4_0_5_dump_ip_state,
.print_ip_state = vcn_v4_0_5_print_ip_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
index c305386358b4..f8e3f0b882da 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -32,7 +32,7 @@
#include "vcn/vcn_5_0_0_offset.h"
#include "vcn/vcn_5_0_0_sh_mask.h"
-#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
#include "vcn_v5_0_0.h"
#include <drm/drm_drv.h>
@@ -78,57 +78,70 @@ static int amdgpu_ih_clientid_vcns[] = {
static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vcn_v5_0_0_set_powergating_state(void *handle,
- enum amd_powergating_state state);
-static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev,
- int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v5_0_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state);
static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
/**
* vcn_v5_0_0_early_init - set function pointers and load microcode
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Set ring and irq function pointers
* Load microcode from filesystem
*/
-static int vcn_v5_0_0_early_init(void *handle)
+static int vcn_v5_0_0_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
- /* re-use enc ring as unified ring */
- adev->vcn.num_enc_rings = 1;
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
vcn_v5_0_0_set_unified_ring_funcs(adev);
vcn_v5_0_0_set_irq_funcs(adev);
- return amdgpu_vcn_early_init(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v5_0_0_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0);
+ uint32_t *ptr;
+
+ /* Allocate memory for VCN IP Dump buffer */
+ ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (!ptr) {
+ DRM_ERROR("Failed to allocate memory for VCN IP Dump\n");
+ adev->vcn.ip_dump = NULL;
+ } else {
+ adev->vcn.ip_dump = ptr;
+ }
}
/**
* vcn_v5_0_0_sw_init - sw init for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Load firmware and sw initialization
*/
-static int vcn_v5_0_0_sw_init(void *handle)
+static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block)
{
struct amdgpu_ring *ring;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r;
- uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0);
- uint32_t *ptr;
-
- r = amdgpu_vcn_sw_init(adev);
- if (r)
- return r;
-
- amdgpu_vcn_setup_ucode(adev);
-
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
volatile struct amdgpu_vcn5_fw_shared *fw_shared;
@@ -136,17 +149,27 @@ static int vcn_v5_0_0_sw_init(void *handle)
if (adev->vcn.harvest_config & (1 << i))
continue;
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
atomic_set(&adev->vcn.inst[i].sched_score, 0);
/* VCN UNIFIED TRAP */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
- VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
+ VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
if (r)
return r;
/* VCN POISON TRAP */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
- VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq);
+ VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq);
if (r)
return r;
@@ -168,32 +191,34 @@ static int vcn_v5_0_0_sw_init(void *handle)
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.inst[i].pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode;
}
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode;
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+ adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
+
+ vcn_v5_0_0_alloc_ip_dump(adev);
+
+ r = amdgpu_vcn_sysfs_reset_mask_init(adev);
+ if (r)
+ return r;
- /* Allocate memory for VCN IP Dump buffer */
- ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL);
- if (!ptr) {
- DRM_ERROR("Failed to allocate memory for VCN IP Dump\n");
- adev->vcn.ip_dump = NULL;
- } else {
- adev->vcn.ip_dump = ptr;
- }
return 0;
}
/**
* vcn_v5_0_0_sw_fini - sw fini for VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* VCN suspend and free up sw allocation
*/
-static int vcn_v5_0_0_sw_fini(void *handle)
+static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, r, idx;
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
@@ -211,27 +236,35 @@ static int vcn_v5_0_0_sw_fini(void *handle)
drm_dev_exit(idx);
}
- r = amdgpu_vcn_suspend(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
- r = amdgpu_vcn_sw_fini(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_sw_fini(adev, i);
+ if (r)
+ return r;
+ }
kfree(adev->vcn.ip_dump);
- return r;
+ return 0;
}
/**
* vcn_v5_0_0_hw_init - start and test VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Initialize the hardware, boot up the VCPU and do some testing
*/
-static int vcn_v5_0_0_hw_init(void *handle)
+static int vcn_v5_0_0_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
struct amdgpu_ring *ring;
int i, r;
@@ -255,25 +288,28 @@ static int vcn_v5_0_0_hw_init(void *handle)
/**
* vcn_v5_0_0_hw_fini - stop the hardware block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Stop the VCN block, mark ring as not ready any more
*/
-static int vcn_v5_0_0_hw_fini(void *handle)
+static int vcn_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
-
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
+
+ cancel_delayed_work_sync(&vinst->idle_work);
+
if (!amdgpu_sriov_vf(adev)) {
if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
- (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
- RREG32_SOC15(VCN, i, regUVD_STATUS))) {
- vcn_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ (vinst->cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, regUVD_STATUS))) {
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
}
}
}
@@ -284,20 +320,24 @@ static int vcn_v5_0_0_hw_fini(void *handle)
/**
* vcn_v5_0_0_suspend - suspend VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* HW fini and suspend VCN block
*/
-static int vcn_v5_0_0_suspend(void *handle)
+static int vcn_v5_0_0_suspend(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = vcn_v5_0_0_hw_fini(adev);
+ r = vcn_v5_0_0_hw_fini(ip_block);
if (r)
return r;
- r = amdgpu_vcn_suspend(adev);
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
return r;
}
@@ -305,20 +345,22 @@ static int vcn_v5_0_0_suspend(void *handle)
/**
* vcn_v5_0_0_resume - resume VCN block
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Resume firmware and hw init VCN block
*/
-static int vcn_v5_0_0_resume(void *handle)
+static int vcn_v5_0_0_resume(struct amdgpu_ip_block *ip_block)
{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
- r = amdgpu_vcn_resume(adev);
- if (r)
- return r;
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
- r = vcn_v5_0_0_hw_init(adev);
+ r = vcn_v5_0_0_hw_init(ip_block);
return r;
}
@@ -326,17 +368,18 @@ static int vcn_v5_0_0_resume(void *handle)
/**
* vcn_v5_0_0_mc_resume - memory controller programming
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst)
+static void vcn_v5_0_0_mc_resume(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -386,18 +429,20 @@ static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst)
/**
* vcn_v5_0_0_mc_resume_dpg_mode - memory controller programming for dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Let the VCN memory controller know it's offsets with dpg mode
*/
-static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t offset, size;
const struct common_firmware_header *hdr;
- hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
/* cache window 0: fw */
@@ -488,7 +533,8 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i
/* VCN global tiling registers */
WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
- VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+ VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG),
+ adev->gfx.config.gb_addr_config, 0, indirect);
return;
}
@@ -496,13 +542,14 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i
/**
* vcn_v5_0_0_disable_static_power_gating - disable VCN static power gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Disable static power gating for VCN block
*/
-static void vcn_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v5_0_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data = 0;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -563,13 +610,14 @@ static void vcn_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev, i
/**
* vcn_v5_0_0_enable_static_power_gating - enable VCN static power gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Enable static power gating for VCN block
*/
-static void vcn_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v5_0_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
uint32_t data;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
@@ -609,12 +657,11 @@ static void vcn_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev, in
/**
* vcn_v5_0_0_disable_clock_gating - disable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Disable clock gating for VCN block
*/
-static void vcn_v5_0_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v5_0_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
return;
}
@@ -623,15 +670,15 @@ static void vcn_v5_0_0_disable_clock_gating(struct amdgpu_device *adev, int inst
/**
* vcn_v5_0_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
* @sram_sel: sram select
- * @inst_idx: instance number index
* @indirect: indirectly write sram
*
* Disable clock gating for VCN block with dpg mode
*/
-static void vcn_v5_0_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
- int inst_idx, uint8_t indirect)
+static void vcn_v5_0_0_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ uint8_t sram_sel,
+ uint8_t indirect)
{
return;
}
@@ -640,12 +687,11 @@ static void vcn_v5_0_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev,
/**
* vcn_v5_0_0_enable_clock_gating - enable VCN clock gating
*
- * @adev: amdgpu_device pointer
- * @inst: instance number
+ * @vinst: VCN instance
*
* Enable clock gating for VCN block
*/
-static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
+static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
{
return;
}
@@ -653,14 +699,16 @@ static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
/**
* vcn_v5_0_0_start_dpg_mode - VCN start with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @indirect: indirectly write sram
*
* Start VCN block with dpg mode
*/
-static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t tmp;
@@ -700,7 +748,7 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect);
- vcn_v5_0_0_mc_resume_dpg_mode(adev, inst_idx, indirect);
+ vcn_v5_0_0_mc_resume_dpg_mode(vinst, indirect);
tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
@@ -746,159 +794,167 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+
return 0;
}
/**
* vcn_v5_0_0_start - VCN start
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Start VCN block
*/
-static int vcn_v5_0_0_start(struct amdgpu_device *adev)
+static int vcn_v5_0_0_start(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
volatile struct amdgpu_vcn5_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t tmp;
- int i, j, k, r;
+ int j, k, r;
+
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, true);
+ amdgpu_dpm_enable_vcn(adev, true, i);
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v5_0_0_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v5_0_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
- continue;
- }
+ /* disable VCN power gating */
+ vcn_v5_0_0_disable_static_power_gating(vinst);
- /* disable VCN power gating */
- vcn_v5_0_0_disable_static_power_gating(adev, i);
-
- /* set VCN status busy */
- tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
- WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
-
- /* enable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
-
- /* disable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
-
- /* enable LMI MC and UMC channels */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
-
- /* setup regUVD_LMI_CTRL */
- tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
- WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
-
- vcn_v5_0_0_mc_resume(adev, i);
-
- /* VCN global tiling registers */
- WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* unblock VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
-
- /* release VCPU reset to boot */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- for (j = 0; j < 10; ++j) {
- uint32_t status;
-
- for (k = 0; k < 100; ++k) {
- status = RREG32_SOC15(VCN, i, regUVD_STATUS);
- if (status & 2)
- break;
- mdelay(10);
- if (amdgpu_emu_mode == 1)
- msleep(1);
- }
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, i, regUVD_STATUS, tmp);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
- if (amdgpu_emu_mode == 1) {
- r = -1;
- if (status & 2) {
- r = 0;
- break;
- }
- } else {
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ vcn_v5_0_0_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, i, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
r = 0;
- if (status & 2)
- break;
-
- dev_err(adev->dev,
- "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
- mdelay(10);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- mdelay(10);
- r = -1;
+ break;
}
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
}
+ }
- if (r) {
- dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
- return r;
- }
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
- /* enable master interrupt */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* clear the busy bit of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- ring = &adev->vcn.inst[i].ring_enc[0];
- WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
- ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
- VCN_RB1_DB_CTRL__EN_MASK);
-
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
-
- tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
- tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
- WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
- fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
- WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
- WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
-
- tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
- WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
- ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
-
- tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
- tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
- WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
- fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
- }
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
return 0;
}
@@ -906,17 +962,18 @@ static int vcn_v5_0_0_start(struct amdgpu_device *adev)
/**
* vcn_v5_0_0_stop_dpg_mode - VCN stop with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
*
* Stop VCN block with dpg mode
*/
-static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
uint32_t tmp;
- vcn_v5_0_0_pause_dpg_mode(adev, inst_idx, &state);
+ vcn_v5_0_0_pause_dpg_mode(vinst, &state);
/* Wait for power status to be 1 */
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
@@ -930,104 +987,117 @@ static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, inst_idx, regUVD_STATUS);
+
return;
}
/**
* vcn_v5_0_0_stop - VCN stop
*
- * @adev: amdgpu_device pointer
+ * @vinst: VCN instance
*
* Stop VCN block
*/
-static int vcn_v5_0_0_stop(struct amdgpu_device *adev)
+static int vcn_v5_0_0_stop(struct amdgpu_vcn_inst *vinst)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
volatile struct amdgpu_vcn5_fw_shared *fw_shared;
uint32_t tmp;
- int i, r = 0;
+ int r = 0;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
-
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
-
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- vcn_v5_0_0_stop_dpg_mode(adev, i);
- continue;
- }
-
- /* wait for vcn idle */
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
- if (r)
- return r;
-
- tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__READ_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_MASK |
- UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ if (adev->vcn.harvest_config & (1 << i))
+ return 0;
- /* disable LMI UMC channel */
- tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
- tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
- WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
- tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
- UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
- r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
- if (r)
- return r;
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
- /* block VCPU register access */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
- UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
-
- /* reset VCPU */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
-
- /* disable VCPU clock */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~(UVD_VCPU_CNTL__CLK_EN_MASK));
-
- /* apply soft reset */
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
- tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
- WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
-
- /* clear status */
- WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
-
- /* enable VCN power gating */
- vcn_v5_0_0_enable_static_power_gating(adev, i);
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v5_0_0_stop_dpg_mode(vinst);
+ r = 0;
+ goto done;
}
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ goto done;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ goto done;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, i, regUVD_STATUS, 0);
+
+ /* enable VCN power gating */
+ vcn_v5_0_0_enable_static_power_gating(vinst);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, i, regUVD_STATUS);
+
+done:
if (adev->pm.dpm_enabled)
- amdgpu_dpm_enable_uvd(adev, false);
+ amdgpu_dpm_enable_vcn(adev, false, i);
- return 0;
+ return r;
}
/**
* vcn_v5_0_0_pause_dpg_mode - VCN pause with dpg mode
*
- * @adev: amdgpu_device pointer
- * @inst_idx: instance number index
+ * @vinst: VCN instance
* @new_state: pause state
*
* Pause dpg mode for VCN block
*/
-static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
- struct dpg_pause_state *new_state)
+static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
uint32_t reg_data = 0;
int ret_code;
@@ -1122,6 +1192,20 @@ static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
+static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me];
+
+ if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
+ return -EOPNOTSUPP;
+
+ vcn_v5_0_0_stop(vinst);
+ vcn_v5_0_0_start(vinst);
+
+ return amdgpu_ring_test_helper(ring);
+}
+
static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
@@ -1149,6 +1233,7 @@ static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = {
.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+ .reset = vcn_v5_0_0_ring_reset,
};
/**
@@ -1174,13 +1259,13 @@ static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev)
/**
* vcn_v5_0_0_is_idle - check VCN block is idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block structure
*
* Check whether VCN block is idle
*/
-static bool vcn_v5_0_0_is_idle(void *handle)
+static bool vcn_v5_0_0_is_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 1;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1196,13 +1281,13 @@ static bool vcn_v5_0_0_is_idle(void *handle)
/**
* vcn_v5_0_0_wait_for_idle - wait for VCN block idle
*
- * @handle: amdgpu_device pointer
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
*
* Wait for VCN block idle
*/
-static int vcn_v5_0_0_wait_for_idle(void *handle)
+static int vcn_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, ret = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
@@ -1221,56 +1306,51 @@ static int vcn_v5_0_0_wait_for_idle(void *handle)
/**
* vcn_v5_0_0_set_clockgating_state - set VCN block clockgating state
*
- * @handle: amdgpu_device pointer
+ * @ip_block: amdgpu_ip_block pointer
* @state: clock gating state
*
* Set VCN block clockgating state
*/
-static int vcn_v5_0_0_set_clockgating_state(void *handle, enum amd_clockgating_state state)
+static int vcn_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
int i;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
if (adev->vcn.harvest_config & (1 << i))
continue;
if (enable) {
if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE)
return -EBUSY;
- vcn_v5_0_0_enable_clock_gating(adev, i);
+ vcn_v5_0_0_enable_clock_gating(vinst);
} else {
- vcn_v5_0_0_disable_clock_gating(adev, i);
+ vcn_v5_0_0_disable_clock_gating(vinst);
}
}
return 0;
}
-/**
- * vcn_v5_0_0_set_powergating_state - set VCN block powergating state
- *
- * @handle: amdgpu_device pointer
- * @state: power gating state
- *
- * Set VCN block powergating state
- */
-static int vcn_v5_0_0_set_powergating_state(void *handle, enum amd_powergating_state state)
+static int vcn_v5_0_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int ret;
+ int ret = 0;
- if (state == adev->vcn.cur_state)
+ if (state == vinst->cur_state)
return 0;
if (state == AMD_PG_STATE_GATE)
- ret = vcn_v5_0_0_stop(adev);
+ ret = vcn_v5_0_0_stop(vinst);
else
- ret = vcn_v5_0_0_start(adev);
+ ret = vcn_v5_0_0_start(vinst);
if (!ret)
- adev->vcn.cur_state = state;
+ vinst->cur_state = state;
return ret;
}
@@ -1304,10 +1384,10 @@ static int vcn_v5_0_0_process_interrupt(struct amdgpu_device *adev, struct amdgp
DRM_DEBUG("IH: VCN TRAP\n");
switch (entry->src_id) {
- case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
break;
- case VCN_4_0__SRCID_UVD_POISON:
+ case VCN_5_0__SRCID_UVD_POISON:
amdgpu_vcn_process_poison_irq(adev, source, entry);
break;
default:
@@ -1338,14 +1418,15 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v5_0_0_irq_funcs;
}
}
-static void vcn_v5_0_print_ip_state(void *handle, struct drm_printer *p)
+void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block,
+ struct drm_printer *p)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0);
uint32_t inst_off, is_powered;
@@ -1375,9 +1456,9 @@ static void vcn_v5_0_print_ip_state(void *handle, struct drm_printer *p)
}
}
-static void vcn_v5_0_dump_ip_state(void *handle)
+void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int i, j;
bool is_powered;
uint32_t inst_off;
@@ -1406,7 +1487,6 @@ static void vcn_v5_0_dump_ip_state(void *handle)
static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = {
.name = "vcn_v5_0_0",
.early_init = vcn_v5_0_0_early_init,
- .late_init = NULL,
.sw_init = vcn_v5_0_0_sw_init,
.sw_fini = vcn_v5_0_0_sw_fini,
.hw_init = vcn_v5_0_0_hw_init,
@@ -1415,14 +1495,10 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = {
.resume = vcn_v5_0_0_resume,
.is_idle = vcn_v5_0_0_is_idle,
.wait_for_idle = vcn_v5_0_0_wait_for_idle,
- .check_soft_reset = NULL,
- .pre_soft_reset = NULL,
- .soft_reset = NULL,
- .post_soft_reset = NULL,
.set_clockgating_state = vcn_v5_0_0_set_clockgating_state,
- .set_powergating_state = vcn_v5_0_0_set_powergating_state,
- .dump_ip_state = vcn_v5_0_dump_ip_state,
- .print_ip_state = vcn_v5_0_print_ip_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = vcn_v5_0_0_dump_ip_state,
+ .print_ip_state = vcn_v5_0_0_print_ip_state,
};
const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h
index 51bbccd4360f..b8927652bc50 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h
@@ -32,6 +32,11 @@
#define VCN_VID_IP_ADDRESS 0x0
#define VCN_AON_IP_ADDRESS 0x30000
+void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev);
+void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block,
+ struct drm_printer *p);
+void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block);
+
extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block;
#endif /* __VCN_V5_0_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
new file mode 100644
index 000000000000..cdefd7fcb0da
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c
@@ -0,0 +1,1624 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "amdgpu_pm.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "soc15_hw_ip.h"
+#include "vcn_v2_0.h"
+#include "vcn_v4_0_3.h"
+#include "mmsch_v5_0.h"
+
+#include "vcn/vcn_5_0_0_offset.h"
+#include "vcn/vcn_5_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h"
+#include "vcn_v5_0_0.h"
+#include "vcn_v5_0_1.h"
+
+#include <drm/drm_drv.h>
+
+static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev);
+static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state);
+static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev);
+/**
+ * vcn_v5_0_1_early_init - set function pointers and load microcode
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Set ring and irq function pointers
+ * Load microcode from filesystem
+ */
+static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ /* re-use enc ring as unified ring */
+ adev->vcn.inst[i].num_enc_rings = 1;
+
+ vcn_v5_0_1_set_unified_ring_funcs(adev);
+ vcn_v5_0_1_set_irq_funcs(adev);
+ vcn_v5_0_1_set_ras_funcs(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].set_pg_state = vcn_v5_0_1_set_pg_state;
+
+ r = amdgpu_vcn_early_init(adev, i);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void vcn_v5_0_1_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+ struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+
+ if (fw_shared->sq.is_enabled)
+ return;
+ fw_shared->present_flag_0 =
+ cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+}
+
+/**
+ * vcn_v5_0_1_sw_init - sw init for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ /* VCN UNIFIED TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
+ if (r)
+ return r;
+
+ /* VCN POISON TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst->ras_poison_irq);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ r = amdgpu_vcn_sw_init(adev, i);
+ if (r)
+ return r;
+
+ amdgpu_vcn_setup_ucode(adev, i);
+
+ r = amdgpu_vcn_resume(adev, i);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ if (!amdgpu_sriov_vf(adev))
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 11 * vcn_inst;
+ else
+ ring->doorbell_index =
+ (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 32 * vcn_inst;
+
+ ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
+ sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
+
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
+
+ vcn_v5_0_1_fw_shared_init(adev, i);
+ }
+
+ /* TODO: Add queue reset mask when FW fully supports it */
+ adev->vcn.supported_reset =
+ amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
+ if (r)
+ return r;
+ }
+
+ vcn_v5_0_0_alloc_ip_dump(adev);
+
+ return amdgpu_vcn_sysfs_reset_mask_init(adev);
+}
+
+/**
+ * vcn_v5_0_1_sw_fini - sw fini for VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, r, idx;
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
+
+ drm_dev_exit(idx);
+ }
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(adev, i);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_sw_fini(adev, i);
+ if (r)
+ return r;
+ }
+
+ amdgpu_vcn_sysfs_reset_mask_fini(adev);
+
+ kfree(adev->vcn.ip_dump);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_hw_init - start and test VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ struct amdgpu_ring *ring;
+ int i, r, vcn_inst;
+
+ if (amdgpu_sriov_vf(adev)) {
+ r = vcn_v5_0_1_start_sriov(adev);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+ ring->wptr_old = 0;
+ vcn_v5_0_1_unified_ring_set_wptr(ring);
+ ring->sched.ready = true;
+ }
+ } else {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100)
+ adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED);
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ vcn_inst = GET_INST(VCN, i);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ if (ring->use_doorbell)
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ 11 * vcn_inst),
+ adev->vcn.inst[i].aid_id);
+
+ /* Re-init fw_shared, if required */
+ vcn_v5_0_1_fw_shared_init(adev, i);
+
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_hw_fini - stop the hardware block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work);
+ if (vinst->cur_state != AMD_PG_STATE_GATE)
+ vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
+ }
+
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+ amdgpu_irq_put(adev, &adev->vcn.inst->ras_poison_irq, 0);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_suspend - suspend VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v5_0_1_suspend(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ r = vcn_v5_0_1_hw_fini(ip_block);
+ if (r)
+ return r;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ r = amdgpu_vcn_suspend(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_1_resume - resume VCN block
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v5_0_1_resume(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int r, i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (amdgpu_in_reset(adev))
+ vinst->cur_state = AMD_PG_STATE_GATE;
+
+ r = amdgpu_vcn_resume(ip_block->adev, i);
+ if (r)
+ return r;
+ }
+
+ r = vcn_v5_0_1_hw_init(ip_block);
+
+ return r;
+}
+
+/**
+ * vcn_v5_0_1_mc_resume - memory controller programming
+ *
+ * @vinst: VCN instance
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v5_0_1_mc_resume(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst = vinst->inst;
+ uint32_t offset, size, vcn_inst;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ vcn_inst = GET_INST(VCN, inst);
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr));
+ offset = size;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ /* non-cache window */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)));
+}
+
+/**
+ * vcn_v5_0_1_mc_resume_dpg_mode - memory controller programming for dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Let the VCN memory controller know it's offsets with dpg mode
+ */
+static void vcn_v5_0_1_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t offset, size;
+ const struct common_firmware_header *hdr;
+
+ hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data;
+ size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
+ inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v5_0_1_disable_clock_gating - disable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v5_0_1_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+}
+
+/**
+ * vcn_v5_0_1_enable_clock_gating - enable VCN clock gating
+ *
+ * @vinst: VCN instance
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
+{
+}
+
+/**
+ * vcn_v5_0_1_pause_dpg_mode - VCN pause with dpg mode
+ *
+ * @vinst: VCN instance
+ * @new_state: pause state
+ *
+ * Pause dpg mode for VCN block
+ */
+static int vcn_v5_0_1_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ struct dpg_pause_state *new_state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ uint32_t reg_data = 0;
+ int vcn_inst;
+
+ vcn_inst = GET_INST(VCN, vinst->inst);
+
+ /* pause/unpause if state is changed */
+ if (vinst->pause_state.fw_based != new_state->fw_based) {
+ DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d %s\n",
+ vinst->pause_state.fw_based, new_state->fw_based,
+ new_state->fw_based ? "VCN_DPG_STATE__PAUSE" : "VCN_DPG_STATE__UNPAUSE");
+ reg_data = RREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+ } else {
+ /* unpause DPG, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_DPG_PAUSE, reg_data);
+ }
+ vinst->pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+
+/**
+ * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode
+ *
+ * @vinst: VCN instance
+ * @indirect: indirectly write sram
+ *
+ * Start VCN block with dpg mode
+ */
+static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
+ bool indirect)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared =
+ adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ struct amdgpu_ring *ring;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__PAUSE};
+ int vcn_inst;
+ uint32_t tmp;
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
+
+ if (indirect) {
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
+ (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+ /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
+ WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF,
+ adev->vcn.inst[inst_idx].aid_id, 0, true);
+ }
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
+
+ vcn_v5_0_1_mc_resume_dpg_mode(vinst, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
+ VCN, 0, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect)
+ amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
+
+ /* resetting ring, fw should not check RB ring */
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+
+ /* Pause dpg */
+ vcn_v5_0_1_pause_dpg_mode(vinst, &state);
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ /* resetting done, fw can check RB ring */
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ return 0;
+}
+
+static int vcn_v5_0_1_start_sriov(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+ struct amdgpu_ring *ring_enc;
+ uint64_t cache_addr;
+ uint64_t rb_enc_addr;
+ uint64_t ctx_addr;
+ uint32_t param, resp, expected;
+ uint32_t offset, cache_size;
+ uint32_t tmp, timeout;
+
+ struct amdgpu_mm_table *table = &adev->virt.mm_table;
+ uint32_t *table_loc;
+ uint32_t table_size;
+ uint32_t size, size_dw;
+ uint32_t init_status;
+ uint32_t enabled_vcn;
+
+ struct mmsch_v5_0_cmd_direct_write
+ direct_wt = { {0} };
+ struct mmsch_v5_0_cmd_direct_read_modify_write
+ direct_rd_mod_wt = { {0} };
+ struct mmsch_v5_0_cmd_end end = { {0} };
+ struct mmsch_v5_0_init_header header;
+
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
+
+ direct_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type =
+ MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ vcn_inst = GET_INST(VCN, i);
+
+ vcn_v5_0_1_fw_shared_init(adev, vcn_inst);
+
+ memset(&header, 0, sizeof(struct mmsch_v5_0_init_header));
+ header.version = MMSCH_VERSION;
+ header.total_size = sizeof(struct mmsch_v5_0_init_header) >> 2;
+
+ table_loc = (uint32_t *)table->cpu_addr;
+ table_loc += header.total_size;
+
+ table_size = 0;
+
+ MMSCH_V5_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+
+ offset = 0;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = cache_size;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE0),
+ cache_size);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET1), 0);
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
+
+ cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE;
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_OFFSET2), 0);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
+
+ fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
+ rb_setup = &fw_shared->rb_setup;
+
+ ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
+ ring_enc->wptr = 0;
+ rb_enc_addr = ring_enc->gpu_addr;
+
+ rb_setup->is_rb_enabled_flags |= RB_ENABLED;
+ rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
+ rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
+ rb_setup->rb_size = ring_enc->ring_size / 4;
+ fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
+
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
+ MMSCH_V5_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
+ regUVD_VCPU_NONCACHE_SIZE0),
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+ MMSCH_V5_0_INSERT_END();
+
+ header.vcn0.init_status = 0;
+ header.vcn0.table_offset = header.total_size;
+ header.vcn0.table_size = table_size;
+ header.total_size += table_size;
+
+ /* Send init table to mmsch */
+ size = sizeof(struct mmsch_v5_0_init_header);
+ table_loc = (uint32_t *)table->cpu_addr;
+ memcpy((void *)table_loc, &header, size);
+
+ ctx_addr = table->gpu_addr;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
+ tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
+
+ size = header.total_size;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
+
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
+
+ param = 0x00000001;
+ WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
+ tmp = 0;
+ timeout = 1000;
+ resp = 0;
+ expected = MMSCH_VF_MAILBOX_RESP__OK;
+ while (resp != expected) {
+ resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
+ if (resp != 0)
+ break;
+
+ udelay(10);
+ tmp = tmp + 10;
+ if (tmp >= timeout) {
+ DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
+ " waiting for regMMSCH_VF_MAILBOX_RESP "\
+ "(expected=0x%08x, readback=0x%08x)\n",
+ tmp, expected, resp);
+ return -EBUSY;
+ }
+ }
+
+ enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
+ init_status = ((struct mmsch_v5_0_init_header *)(table_loc))->vcn0.init_status;
+ if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
+ && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
+ DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
+ "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_start - VCN start
+ *
+ * @vinst: VCN instance
+ *
+ * Start VCN block
+ */
+static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ struct amdgpu_ring *ring;
+ uint32_t tmp;
+ int j, k, r, vcn_inst;
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v5_0_1_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram);
+
+ vcn_inst = GET_INST(VCN, i);
+
+ /* set VCN status busy */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* setup regUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ vcn_v5_0_1_mc_resume(vinst);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+
+ /* unblock VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ for (j = 0; j < 10; ++j) {
+ uint32_t status;
+
+ for (k = 0; k < 100; ++k) {
+ status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(100);
+ if (amdgpu_emu_mode == 1)
+ msleep(20);
+ }
+
+ if (amdgpu_emu_mode == 1) {
+ r = -1;
+ if (status & 2) {
+ r = 0;
+ break;
+ }
+ } else {
+ r = 0;
+ if (status & 2)
+ break;
+
+ dev_err(adev->dev,
+ "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ mdelay(10);
+ r = -1;
+ }
+ }
+
+ if (r) {
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
+
+ /* Read DB_CTRL to flush the write DB_CTRL command. */
+ RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL);
+
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+
+ tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_stop_dpg_mode - VCN stop with dpg mode
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block with dpg mode
+ */
+static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int inst_idx = vinst->inst;
+ uint32_t tmp;
+ int vcn_inst;
+ struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
+
+ vcn_inst = GET_INST(VCN, inst_idx);
+
+ /* Unpause dpg */
+ vcn_v5_0_1_pause_dpg_mode(vinst, &state);
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+}
+
+/**
+ * vcn_v5_0_1_stop - VCN stop
+ *
+ * @vinst: VCN instance
+ *
+ * Stop VCN block
+ */
+static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int i = vinst->inst;
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
+ uint32_t tmp;
+ int r = 0, vcn_inst;
+
+ vcn_inst = GET_INST(VCN, i);
+
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ vcn_v5_0_1_stop_dpg_mode(vinst);
+ return 0;
+ }
+
+ /* wait for vcn idle */
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* disable LMI UMC channel */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp);
+ if (r)
+ return r;
+
+ /* block VCPU register access */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
+ UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* apply soft reset */
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
+ tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
+
+ /* clear status */
+ WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
+
+ /* Keeping one read-back to ensure all register writes are done,
+ * otherwise it may introduce race conditions.
+ */
+ RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS);
+
+ return 0;
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_get_rptr - get unified read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified read pointer
+ */
+static uint64_t vcn_v5_0_1_unified_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_get_wptr - get unified write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware unified write pointer
+ */
+static uint64_t vcn_v5_0_1_unified_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell)
+ return *ring->wptr_cpu_addr;
+ else
+ return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR);
+}
+
+/**
+ * vcn_v5_0_1_unified_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ if (ring->use_doorbell) {
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
+ lower_32_bits(ring->wptr));
+ }
+}
+
+static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .get_rptr = vcn_v5_0_1_unified_ring_get_rptr,
+ .get_wptr = vcn_v5_0_1_unified_ring_get_wptr,
+ .set_wptr = vcn_v5_0_1_unified_ring_set_wptr,
+ .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 +
+ 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush,
+ .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+/**
+ * vcn_v5_0_1_set_unified_ring_funcs - set unified ring functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set unified ring functions
+ */
+static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev)
+{
+ int i, vcn_inst;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_1_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
+ vcn_inst = GET_INST(VCN, i);
+ adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid;
+ }
+}
+
+/**
+ * vcn_v5_0_1_is_idle - check VCN block is idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block structure
+ *
+ * Check whether VCN block is idle
+ */
+static bool vcn_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 1;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE);
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_wait_for_idle - wait for VCN block idle
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ *
+ * Wait for VCN block idle
+ */
+static int vcn_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_set_clockgating_state - set VCN block clockgating state
+ *
+ * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
+ * @state: clock gating state
+ *
+ * Set VCN block clockgating state
+ */
+static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = ip_block->adev;
+ bool enable = state == AMD_CG_STATE_GATE;
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
+
+ if (enable) {
+ if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE)
+ return -EBUSY;
+ vcn_v5_0_1_enable_clock_gating(vinst);
+ } else {
+ vcn_v5_0_1_disable_clock_gating(vinst);
+ }
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = vinst->adev;
+ int ret = 0;
+
+ /* for SRIOV, guest should not control VCN Power-gating
+ * MMSCH FW should control Power-gating and clock-gating
+ * guest should avoid touching CGC and PG
+ */
+ if (amdgpu_sriov_vf(adev)) {
+ vinst->cur_state = AMD_PG_STATE_UNGATE;
+ return 0;
+ }
+
+ if (state == vinst->cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v5_0_1_stop(vinst);
+ else
+ ret = vcn_v5_0_1_start(vinst);
+
+ if (!ret)
+ vinst->cur_state = state;
+
+ return ret;
+}
+
+/**
+ * vcn_v5_0_1_process_interrupt - process VCN block interrupt
+ *
+ * @adev: amdgpu_device pointer
+ * @source: interrupt sources
+ * @entry: interrupt entry from clients and sources
+ *
+ * Process VCN block interrupt
+ */
+static int vcn_v5_0_1_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ uint32_t i, inst;
+
+ i = node_id_to_phys_map[entry->node_id];
+
+ DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
+ if (adev->vcn.inst[inst].aid_id == i)
+ break;
+ if (inst >= adev->vcn.num_vcn_inst) {
+ dev_WARN_ONCE(adev->dev, 1,
+ "Interrupt received for unknown VCN instance %d",
+ entry->node_id);
+ return 0;
+ }
+
+ switch (entry->src_id) {
+ case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
+ break;
+ default:
+ DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int vcn_v5_0_1_set_ras_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_1_irq_funcs = {
+ .process = vcn_v5_0_1_process_interrupt,
+};
+
+static const struct amdgpu_irq_src_funcs vcn_v5_0_1_ras_irq_funcs = {
+ .set = vcn_v5_0_1_set_ras_interrupt_state,
+ .process = amdgpu_vcn_process_poison_irq,
+};
+
+
+/**
+ * vcn_v5_0_1_set_irq_funcs - set VCN block interrupt irq functions
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set VCN block interrupt irq functions
+ */
+static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i)
+ adev->vcn.inst->irq.num_types++;
+
+ adev->vcn.inst->irq.funcs = &vcn_v5_0_1_irq_funcs;
+
+ adev->vcn.inst->ras_poison_irq.num_types = 1;
+ adev->vcn.inst->ras_poison_irq.funcs = &vcn_v5_0_1_ras_irq_funcs;
+
+}
+
+static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = {
+ .name = "vcn_v5_0_1",
+ .early_init = vcn_v5_0_1_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v5_0_1_sw_init,
+ .sw_fini = vcn_v5_0_1_sw_fini,
+ .hw_init = vcn_v5_0_1_hw_init,
+ .hw_fini = vcn_v5_0_1_hw_fini,
+ .suspend = vcn_v5_0_1_suspend,
+ .resume = vcn_v5_0_1_resume,
+ .is_idle = vcn_v5_0_1_is_idle,
+ .wait_for_idle = vcn_v5_0_1_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v5_0_1_set_clockgating_state,
+ .set_powergating_state = vcn_set_powergating_state,
+ .dump_ip_state = vcn_v5_0_0_dump_ip_state,
+ .print_ip_state = vcn_v5_0_0_print_ip_state,
+};
+
+const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 5,
+ .minor = 0,
+ .rev = 1,
+ .funcs = &vcn_v5_0_1_ip_funcs,
+};
+
+static uint32_t vcn_v5_0_1_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V5_0_1_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v5_0_1_query_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V5_0_1_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v5_0_1_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+static const struct amdgpu_ras_block_hw_ops vcn_v5_0_1_ras_hw_ops = {
+ .query_poison_status = vcn_v5_0_1_query_poison_status,
+};
+
+static int vcn_v5_0_1_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ struct aca_bank_info info;
+ u64 misc0;
+ int ret;
+
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ bank->aca_err_type = ACA_ERROR_TYPE_UE;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+/* reference to smu driver if header file */
+static int vcn_v5_0_1_err_codes[] = {
+ 14, 15, /* VCN */
+};
+
+static bool vcn_v5_0_1_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ vcn_v5_0_1_err_codes,
+ ARRAY_SIZE(vcn_v5_0_1_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops vcn_v5_0_1_aca_bank_ops = {
+ .aca_bank_parser = vcn_v5_0_1_aca_bank_parser,
+ .aca_bank_is_valid = vcn_v5_0_1_aca_bank_is_valid,
+};
+
+static const struct aca_info vcn_v5_0_1_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &vcn_v5_0_1_aca_bank_ops,
+};
+
+static int vcn_v5_0_1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN,
+ &vcn_v5_0_1_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
+static struct amdgpu_vcn_ras vcn_v5_0_1_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v5_0_1_ras_hw_ops,
+ .ras_late_init = vcn_v5_0_1_ras_late_init,
+ },
+};
+
+static void vcn_v5_0_1_set_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.ras = &vcn_v5_0_1_ras;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h
new file mode 100644
index 000000000000..b72e4da68317
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_v5_0_1_H__
+#define __VCN_v5_0_1_H__
+
+#define regVCN_RRMT_CNTL 0x0940
+#define regVCN_RRMT_CNTL_BASE_IDX 1
+
+
+enum amdgpu_vcn_v5_0_1_sub_block {
+ AMDGPU_VCN_V5_0_1_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V5_0_1_MAX_SUB_BLOCK,
+};
+
+extern const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block;
+
+#endif /* __VCN_v5_0_1_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index bf68e18e3824..eb16916c6473 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -364,9 +364,8 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
* this should allow us to catchup.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
@@ -472,18 +471,18 @@ static void vega10_ih_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &vega10_ih_self_irq_funcs;
}
-static int vega10_ih_early_init(void *handle)
+static int vega10_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega10_ih_set_interrupt_funcs(adev);
vega10_ih_set_self_irq_funcs(adev);
return 0;
}
-static int vega10_ih_sw_init(void *handle)
+static int vega10_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int r;
r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0,
@@ -525,58 +524,50 @@ static int vega10_ih_sw_init(void *handle)
return r;
}
-static int vega10_ih_sw_fini(void *handle)
+static int vega10_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int vega10_ih_hw_init(void *handle)
+static int vega10_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega10_ih_irq_init(adev);
+ return vega10_ih_irq_init(ip_block->adev);
}
-static int vega10_ih_hw_fini(void *handle)
+static int vega10_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- vega10_ih_irq_disable(adev);
+ vega10_ih_irq_disable(ip_block->adev);
return 0;
}
-static int vega10_ih_suspend(void *handle)
+static int vega10_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega10_ih_hw_fini(adev);
+ return vega10_ih_hw_fini(ip_block);
}
-static int vega10_ih_resume(void *handle)
+static int vega10_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega10_ih_hw_init(adev);
+ return vega10_ih_hw_init(ip_block);
}
-static bool vega10_ih_is_idle(void *handle)
+static bool vega10_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int vega10_ih_wait_for_idle(void *handle)
+static int vega10_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int vega10_ih_soft_reset(void *handle)
+static int vega10_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
@@ -613,10 +604,10 @@ static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int vega10_ih_set_clockgating_state(void *handle,
+static int vega10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega10_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -624,7 +615,7 @@ static int vega10_ih_set_clockgating_state(void *handle,
}
-static int vega10_ih_set_powergating_state(void *handle,
+static int vega10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -633,7 +624,6 @@ static int vega10_ih_set_powergating_state(void *handle,
const struct amd_ip_funcs vega10_ih_ip_funcs = {
.name = "vega10_ih",
.early_init = vega10_ih_early_init,
- .late_init = NULL,
.sw_init = vega10_ih_sw_init,
.sw_fini = vega10_ih_sw_fini,
.hw_init = vega10_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index ac439f0565e3..85846fd08ce4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -114,6 +114,33 @@ static int vega20_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
+ if (enable) {
+ /* Unset the CLEAR_OVERFLOW bit to make sure the next step
+ * is switching the bit from 0 to 1
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Clear RB_OVERFLOW bit */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
+ if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp))
+ return -ETIMEDOUT;
+ } else {
+ WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
+ }
+
+ /* Unset the CLEAR_OVERFLOW bit immediately so new overflows
+ * can be detected.
+ */
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
+ }
+
/* enable_intr field is only valid in ring0 */
if (ih == &adev->irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
@@ -323,6 +350,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
if (ret)
return ret;
}
+ ih[i]->overflow = false;
}
if (!amdgpu_sriov_vf(adev))
@@ -339,6 +367,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
/* Enable IH Retry CAM */
if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0) ||
amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2) ||
+ amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 4) ||
amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 5))
WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN,
ENABLE, 1);
@@ -409,16 +438,18 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
goto out;
- wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ if (!amdgpu_sriov_vf(adev))
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+ else
+ ih->overflow = true;
/* When a ring buffer overflow happen start parsing interrupt
* from the last not overwritten vector (wptr + 32). Hopefully
* this should allow us to catchup.
*/
tmp = (wptr + 32) & ih->ptr_mask;
- dev_warn(adev->dev, "IH ring buffer overflow "
- "(0x%08X, 0x%08X, 0x%08X)\n",
- wptr, ih->rptr, tmp);
+ dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
+ amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp);
ih->rptr = tmp;
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
@@ -526,18 +557,18 @@ static void vega20_ih_set_self_irq_funcs(struct amdgpu_device *adev)
adev->irq.self_irq.funcs = &vega20_ih_self_irq_funcs;
}
-static int vega20_ih_early_init(void *handle)
+static int vega20_ih_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega20_ih_set_interrupt_funcs(adev);
vega20_ih_set_self_irq_funcs(adev);
return 0;
}
-static int vega20_ih_sw_init(void *handle)
+static int vega20_ih_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
bool use_bus_addr = true;
int r;
@@ -586,19 +617,19 @@ static int vega20_ih_sw_init(void *handle)
return r;
}
-static int vega20_ih_sw_fini(void *handle)
+static int vega20_ih_sw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
amdgpu_irq_fini_sw(adev);
return 0;
}
-static int vega20_ih_hw_init(void *handle)
+static int vega20_ih_hw_init(struct amdgpu_ip_block *ip_block)
{
int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
r = vega20_ih_irq_init(adev);
if (r)
@@ -607,42 +638,36 @@ static int vega20_ih_hw_init(void *handle)
return 0;
}
-static int vega20_ih_hw_fini(void *handle)
+static int vega20_ih_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- vega20_ih_irq_disable(adev);
+ vega20_ih_irq_disable(ip_block->adev);
return 0;
}
-static int vega20_ih_suspend(void *handle)
+static int vega20_ih_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega20_ih_hw_fini(adev);
+ return vega20_ih_hw_fini(ip_block);
}
-static int vega20_ih_resume(void *handle)
+static int vega20_ih_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vega20_ih_hw_init(adev);
+ return vega20_ih_hw_init(ip_block);
}
-static bool vega20_ih_is_idle(void *handle)
+static bool vega20_ih_is_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return true;
}
-static int vega20_ih_wait_for_idle(void *handle)
+static int vega20_ih_wait_for_idle(struct amdgpu_ip_block *ip_block)
{
/* todo */
return -ETIMEDOUT;
}
-static int vega20_ih_soft_reset(void *handle)
+static int vega20_ih_soft_reset(struct amdgpu_ip_block *ip_block)
{
/* todo */
@@ -676,10 +701,10 @@ static void vega20_ih_update_clockgating_state(struct amdgpu_device *adev,
}
}
-static int vega20_ih_set_clockgating_state(void *handle,
+static int vega20_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
vega20_ih_update_clockgating_state(adev,
state == AMD_CG_STATE_GATE);
@@ -687,7 +712,7 @@ static int vega20_ih_set_clockgating_state(void *handle,
}
-static int vega20_ih_set_powergating_state(void *handle,
+static int vega20_ih_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
@@ -696,7 +721,6 @@ static int vega20_ih_set_powergating_state(void *handle,
const struct amd_ip_funcs vega20_ih_ip_funcs = {
.name = "vega20_ih",
.early_init = vega20_ih_early_init,
- .late_init = NULL,
.sw_init = vega20_ih_sw_init,
.sw_fini = vega20_ih_sw_fini,
.hw_init = vega20_ih_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index d39c670f6220..9b3510e53112 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -136,15 +136,15 @@ static const struct amdgpu_video_codec_info polaris_video_codecs_encode_array[]
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
.max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
.max_level = 0,
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
.max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
+ .max_height = 4096,
+ .max_pixels_per_frame = 4096 * 4096,
.max_level = 0,
},
};
@@ -167,16 +167,16 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] =
{
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 3,
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 5,
},
{
@@ -188,9 +188,9 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] =
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 4,
},
};
@@ -206,16 +206,16 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] =
{
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 3,
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 5,
},
{
@@ -227,9 +227,9 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] =
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
+ .max_width = 1920,
+ .max_height = 1088,
+ .max_pixels_per_frame = 1920 * 1088,
.max_level = 4,
},
{
@@ -1455,9 +1455,9 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
#define CZ_REV_BRISTOL(rev) \
((rev >= 0xC8 && rev <= 0xCE) || (rev >= 0xE1 && rev <= 0xE6))
-static int vi_common_early_init(void *handle)
+static int vi_common_early_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (adev->flags & AMD_IS_APU) {
adev->smc_rreg = &cz_smc_rreg;
@@ -1679,9 +1679,9 @@ static int vi_common_early_init(void *handle)
return 0;
}
-static int vi_common_late_init(void *handle)
+static int vi_common_late_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_vi_mailbox_get_irq(adev);
@@ -1689,9 +1689,9 @@ static int vi_common_late_init(void *handle)
return 0;
}
-static int vi_common_sw_init(void *handle)
+static int vi_common_sw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
xgpu_vi_mailbox_add_irq_id(adev);
@@ -1699,14 +1699,9 @@ static int vi_common_sw_init(void *handle)
return 0;
}
-static int vi_common_sw_fini(void *handle)
-{
- return 0;
-}
-
-static int vi_common_hw_init(void *handle)
+static int vi_common_hw_init(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* move the golden regs per IP block */
vi_init_golden_registers(adev);
@@ -1718,9 +1713,9 @@ static int vi_common_hw_init(void *handle)
return 0;
}
-static int vi_common_hw_fini(void *handle)
+static int vi_common_hw_fini(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
/* enable the doorbell aperture */
vi_enable_doorbell_aperture(adev, false);
@@ -1731,35 +1726,21 @@ static int vi_common_hw_fini(void *handle)
return 0;
}
-static int vi_common_suspend(void *handle)
+static int vi_common_suspend(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vi_common_hw_fini(adev);
+ return vi_common_hw_fini(ip_block);
}
-static int vi_common_resume(void *handle)
+static int vi_common_resume(struct amdgpu_ip_block *ip_block)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- return vi_common_hw_init(adev);
+ return vi_common_hw_init(ip_block);
}
-static bool vi_common_is_idle(void *handle)
+static bool vi_common_is_idle(struct amdgpu_ip_block *ip_block)
{
return true;
}
-static int vi_common_wait_for_idle(void *handle)
-{
- return 0;
-}
-
-static int vi_common_soft_reset(void *handle)
-{
- return 0;
-}
-
static void vi_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
@@ -1964,10 +1945,10 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,
return 0;
}
-static int vi_common_set_clockgating_state(void *handle,
+static int vi_common_set_clockgating_state(struct amdgpu_ip_block *ip_block,
enum amd_clockgating_state state)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
if (amdgpu_sriov_vf(adev))
return 0;
@@ -2007,15 +1988,15 @@ static int vi_common_set_clockgating_state(void *handle,
return 0;
}
-static int vi_common_set_powergating_state(void *handle,
+static int vi_common_set_powergating_state(struct amdgpu_ip_block *ip_block,
enum amd_powergating_state state)
{
return 0;
}
-static void vi_common_get_clockgating_state(void *handle, u64 *flags)
+static void vi_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = ip_block->adev;
int data;
if (amdgpu_sriov_vf(adev))
@@ -2047,19 +2028,14 @@ static const struct amd_ip_funcs vi_common_ip_funcs = {
.early_init = vi_common_early_init,
.late_init = vi_common_late_init,
.sw_init = vi_common_sw_init,
- .sw_fini = vi_common_sw_fini,
.hw_init = vi_common_hw_init,
.hw_fini = vi_common_hw_fini,
.suspend = vi_common_suspend,
.resume = vi_common_resume,
.is_idle = vi_common_is_idle,
- .wait_for_idle = vi_common_wait_for_idle,
- .soft_reset = vi_common_soft_reset,
.set_clockgating_state = vi_common_set_clockgating_state,
.set_powergating_state = vi_common_set_powergating_state,
.get_clockgating_state = vi_common_get_clockgating_state,
- .dump_ip_state = NULL,
- .print_ip_state = NULL,
};
static const struct amdgpu_ip_block_version vi_common_ip_block =