summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-16 05:04:27 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-16 05:04:27 +0300
commitbe8454afc50f43016ca8b6130d9673bdd0bd56ec (patch)
tree897e49c1ccadeed9b083a3ffc13f0dd2d6d7d874 /drivers/gpu/drm/amd/amdgpu
parentfec88ab0af9706b2201e5daf377c5031c62d11f7 (diff)
parent3729fe2bc2a01f4cc1aa88be8f64af06084c87d6 (diff)
downloadlinux-be8454afc50f43016ca8b6130d9673bdd0bd56ec.tar.xz
Merge tag 'drm-next-2019-07-16' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "The biggest thing in this is the AMD Navi GPU support, this again contains a bunch of header files that are large. These are the new AMD RX5700 GPUs that just recently became available. New drivers: - ST-Ericsson MCDE driver - Ingenic JZ47xx SoC UAPI change: - HDR source metadata property Core: - HDR inforframes and EDID parsing - drm hdmi infoframe unpacking - remove prime sg_table caching into dma-buf - New gem vram helpers to reduce driver code - Lots of drmP.h removal - reservation fencing fix - documentation updates - drm_fb_helper_connector removed - mode name command handler rewrite fbcon: - Remove the fbcon notifiers ttm: - forward progress fixes dma-buf: - make mmap call optional - debugfs refcount fixes - dma-fence free with pending signals fix - each dma-buf gets an inode Panels: - Lots of additional panel bindings amdgpu: - initial navi10 support - avoid hw reset - HDR metadata support - new thermal sensors for vega asics - RAS fixes - use HMM rather than MMU notifier - xgmi topology via kfd - SR-IOV fixes - driver reload fixes - DC use a core bpc attribute - Aux fixes for DC - Bandwidth calc updates for DC - Clock handling refactor - kfd VEGAM support vmwgfx: - Coherent memory support changes i915: - HDR Support - HDMI i2c link - Icelake multi-segmented gamma support - GuC firmware update - Mule Creek Canyon PCH support for EHL - EHL platform updtes - move i915.alpha_support to i915.force_probe - runtime PM refactoring - VBT parsing refactoring - DSI fixes - struct mutex dependency reduction - GEM code reorg mali-dp: - Komeda driver features msm: - dsi vs EPROBE_DEFER fixes - msm8998 snapdragon 835 support - a540 gpu support - mdp5 and dpu interconnect support exynos: - drmP.h removal tegra: - misc fixes tda998x: - audio support improvements - pixel repeated mode support - quantisation range handling corrections - HDMI vendor info fix armada: - interlace support fix - overlay/video plane register handling refactor - add gamma support rockchip: - RX3328 support panfrost: - expose perf counters via hidden ioctls vkms: - enumerate CRC sources list ast: - rework BO handling mgag200: - rework BO handling dw-hdmi: - suspend/resume support rcar-du: - R8A774A1 Soc Support - LVDS dual-link mode support - Additional formats - Misc fixes omapdrm: - DSI command mode display support stm - fb modifier support - runtime PM support sun4i: - use vmap ops vc4: - binner bo binding rework v3d: - compute shader support - resync/sync fixes - job management refactoring lima: - NULL pointer in irq handler fix - scheduler default timeout virtio: - fence seqno support - trace events bochs: - misc fixes tc458767: - IRQ/HDP handling sii902x: - HDMI audio support atmel-hlcdc: - misc fixes meson: - zpos support" * tag 'drm-next-2019-07-16' of git://anongit.freedesktop.org/drm/drm: (1815 commits) Revert "Merge branch 'vmwgfx-next' of git://people.freedesktop.org/~thomash/linux into drm-next" Revert "mm: adjust apply_to_pfn_range interface for dropped token." mm: adjust apply_to_pfn_range interface for dropped token. drm/amdgpu/navi10: add uclk activity sensor drm/amdgpu: properly guard the generic discovery code drm/amdgpu: add missing documentation on new module parameters drm/amdgpu: don't invalidate caches in RELEASE_MEM, only do the writeback drm/amd/display: avoid 64-bit division drm/amdgpu/psp11: simplify the ucode register logic drm/amdgpu: properly guard DC support in navi code drm/amd/powerplay: vega20: fix uninitialized variable use drm/amd/display: dcn20: include linux/delay.h amdgpu: make pmu support optional drm/amd/powerplay: Zero initialize current_rpm in vega20_get_fan_speed_percent drm/amd/powerplay: Zero initialize freq in smu_v11_0_get_current_clk_freq drm/amd/powerplay: Use memset to initialize metrics structs drm/amdgpu/mes10.1: Fix header guard drm/amd/powerplay: add temperature sensor support for navi10 drm/amdgpu: fix scheduler timeout calc drm/amdgpu: Prepare for hmm_range_register API change (v2) ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h80
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c975
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c85
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c228
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c163
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c185
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c506
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c415
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c (renamed from drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c)133
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c186
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c182
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c211
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c388
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c280
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c205
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c302
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c314
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c197
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h94
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.c101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_crtc.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_i2c.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h975
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c391
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c5216
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c523
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c353
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c918
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_smc.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.c366
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c444
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c486
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h4806
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c334
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c823
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nvd.h418
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c121
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c135
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c1687
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_smc.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c110
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_ras_if.h108
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c150
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c2261
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c17
185 files changed, 27001 insertions, 1857 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 844f0a162981..f6e5c0282fc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -27,10 +27,10 @@ config DRM_AMDGPU_CIK
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
- select MMU_NOTIFIER
+ depends on HMM_MIRROR
help
- This option selects CONFIG_MMU_NOTIFIER if it isn't already
- selected to enabled full userptr support.
+ This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+ isn't already selected to enabled full userptr support.
config DRM_AMDGPU_GART_DEBUGFS
bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index fdd0ca4b0f0b..56e084367b93 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -49,12 +49,14 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
- amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
+ amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
- amdgpu_vm_sdma.o
+ amdgpu_vm_sdma.o amdgpu_discovery.o
+
+amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@@ -64,7 +66,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
- vega20_reg_init.o nbio_v7_4.o
+ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o
# add DF block
amdgpu-y += \
@@ -75,7 +77,8 @@ amdgpu-y += \
amdgpu-y += \
gmc_v7_0.o \
gmc_v8_0.o \
- gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o
+ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o \
+ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o
# add IH block
amdgpu-y += \
@@ -84,7 +87,8 @@ amdgpu-y += \
iceland_ih.o \
tonga_ih.o \
cz_ih.o \
- vega10_ih.o
+ vega10_ih.o \
+ navi10_ih.o
# add PSP block
amdgpu-y += \
@@ -108,14 +112,20 @@ amdgpu-y += \
amdgpu_gfx.o \
amdgpu_rlc.o \
gfx_v8_0.o \
- gfx_v9_0.o
+ gfx_v9_0.o \
+ gfx_v10_0.o
# add async DMA block
amdgpu-y += \
amdgpu_sdma.o \
sdma_v2_4.o \
sdma_v3_0.o \
- sdma_v4_0.o
+ sdma_v4_0.o \
+ sdma_v5_0.o
+
+# add MES block
+amdgpu-y += \
+ mes_v10_1.o
# add UVD block
amdgpu-y += \
@@ -133,7 +143,12 @@ amdgpu-y += \
# add VCN block
amdgpu-y += \
amdgpu_vcn.o \
- vcn_v1_0.o
+ vcn_v1_0.o \
+ vcn_v2_0.o
+
+# add ATHUB block
+amdgpu-y += \
+ athub_v2_0.o
# add amdkfd interfaces
amdgpu-y += amdgpu_amdkfd.o
@@ -146,7 +161,8 @@ amdgpu-y += \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
amdgpu_amdkfd_gfx_v8.o \
- amdgpu_amdkfd_gfx_v9.o
+ amdgpu_amdkfd_gfx_v9.o \
+ amdgpu_amdkfd_gfx_v10.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o
@@ -173,7 +189,7 @@ endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
include $(FULL_AMD_PATH)/powerplay/Makefile
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 14398f55f602..f88d8141447c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -44,9 +44,9 @@
#include <drm/ttm/ttm_module.h>
#include <drm/ttm/ttm_execbuf_util.h>
-#include <drm/drmP.h>
-#include <drm/drm_gem.h>
#include <drm/amdgpu_drm.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
#include <drm/gpu_scheduler.h>
#include <kgd_kfd_interface.h>
@@ -84,6 +84,8 @@
#include "amdgpu_doorbell.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_smu.h"
+#include "amdgpu_discovery.h"
+#include "amdgpu_mes.h"
#define MAX_GPU_INSTANCE 16
@@ -118,7 +120,6 @@ extern int amdgpu_disp_priority;
extern int amdgpu_hw_i2c;
extern int amdgpu_pcie_gen2;
extern int amdgpu_msi;
-extern int amdgpu_lockup_timeout;
extern int amdgpu_dpm;
extern int amdgpu_fw_load_type;
extern int amdgpu_aspm;
@@ -143,7 +144,6 @@ extern uint amdgpu_sdma_phase_quantum;
extern char *amdgpu_disable_cu;
extern char *amdgpu_virtual_display;
extern uint amdgpu_pp_feature_mask;
-extern int amdgpu_vram_page_split;
extern int amdgpu_ngg;
extern int amdgpu_prim_buf_per_se;
extern int amdgpu_pos_buf_per_se;
@@ -156,9 +156,14 @@ extern int amdgpu_gpu_recovery;
extern int amdgpu_emu_mode;
extern uint amdgpu_smu_memory_pool_size;
extern uint amdgpu_dc_feature_mask;
+extern uint amdgpu_dm_abm_level;
extern struct amdgpu_mgpu_info mgpu_info;
extern int amdgpu_ras_enable;
extern uint amdgpu_ras_mask;
+extern int amdgpu_async_gfx_ring;
+extern int amdgpu_mcbp;
+extern int amdgpu_discovery;
+extern int amdgpu_mes;
#ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support;
@@ -211,9 +216,11 @@ struct amdgpu_irq_src;
struct amdgpu_fpriv;
struct amdgpu_bo_va_mapping;
struct amdgpu_atif;
+struct kfd_vm_fault_info;
enum amdgpu_cp_irq {
- AMDGPU_CP_IRQ_GFX_EOP = 0,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP,
AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP,
AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP,
AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP,
@@ -415,6 +422,7 @@ struct amdgpu_fpriv {
};
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib);
@@ -558,6 +566,8 @@ struct amdgpu_asic_funcs {
uint64_t *count1);
/* do we need to reset the asic at init time (e.g., kexec) */
bool (*need_reset_on_init)(struct amdgpu_device *adev);
+ /* PCIe replay counter */
+ uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
};
/*
@@ -639,6 +649,11 @@ struct nbio_hdp_flush_reg {
u32 ref_and_mask_sdma1;
};
+struct amdgpu_mmio_remap {
+ u32 reg_offset;
+ resource_size_t bus_addr;
+};
+
struct amdgpu_nbio_funcs {
const struct nbio_hdp_flush_reg *hdp_flush_reg;
u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
@@ -651,6 +666,8 @@ struct amdgpu_nbio_funcs {
u32 (*get_memsize)(struct amdgpu_device *adev);
void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
bool use_doorbell, int doorbell_index, int doorbell_size);
+ void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index);
void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
bool enable);
void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
@@ -666,10 +683,11 @@ struct amdgpu_nbio_funcs {
void (*ih_control)(struct amdgpu_device *adev);
void (*init_registers)(struct amdgpu_device *adev);
void (*detect_hw_virt)(struct amdgpu_device *adev);
+ void (*remap_hdp_registers)(struct amdgpu_device *adev);
};
struct amdgpu_df_funcs {
- void (*init)(struct amdgpu_device *adev);
+ void (*sw_init)(struct amdgpu_device *adev);
void (*enable_broadcast_mode)(struct amdgpu_device *adev,
bool enable);
u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
@@ -680,6 +698,12 @@ struct amdgpu_df_funcs {
u32 *flags);
void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
bool enable);
+ int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
+ int is_enable);
+ int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
+ int is_disable);
+ void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
+ uint64_t *count);
};
/* Define the HW IP blocks will be used in driver , add more if necessary */
enum amd_hw_ip_block_type {
@@ -714,6 +738,7 @@ struct amd_powerplay {
};
#define AMDGPU_RESET_MAGIC_NUM 64
+#define AMDGPU_MAX_DF_PERFMONS 4
struct amdgpu_device {
struct device *dev;
struct drm_device *ddev;
@@ -740,6 +765,7 @@ struct amdgpu_device {
struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
unsigned debugfs_count;
#if defined(CONFIG_DEBUG_FS)
+ struct dentry *debugfs_preempt;
struct dentry *debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
#endif
struct amdgpu_atif *atif;
@@ -749,6 +775,7 @@ struct amdgpu_device {
struct mutex grbm_idx_mutex;
struct dev_pm_domain vga_pm_domain;
bool have_disp_power_ref;
+ bool have_atomics_support;
/* BIOS */
bool is_atom_fw;
@@ -764,6 +791,7 @@ struct amdgpu_device {
void __iomem *rmmio;
/* protects concurrent MM_INDEX/DATA based register access */
spinlock_t mmio_idx_lock;
+ struct amdgpu_mmio_remap rmmio_remap;
/* protects concurrent SMC based register access */
spinlock_t smc_idx_lock;
amdgpu_rreg_t smc_rreg;
@@ -889,6 +917,13 @@ struct amdgpu_device {
/* display related functionality */
struct amdgpu_display_manager dm;
+ /* discovery */
+ uint8_t *discovery;
+
+ /* mes */
+ bool enable_mes;
+ struct amdgpu_mes mes;
+
struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM];
int num_ip_blocks;
struct mutex mn_lock;
@@ -906,7 +941,7 @@ struct amdgpu_device {
const struct amdgpu_df_funcs *df_funcs;
/* delayed work_func for deferring clockgating during resume */
- struct delayed_work late_init_work;
+ struct delayed_work delayed_init_work;
struct amdgpu_virt virt;
/* firmware VRAM reservation */
@@ -936,6 +971,14 @@ struct amdgpu_device {
struct work_struct xgmi_reset_work;
bool in_baco_reset;
+
+ long gfx_timeout;
+ long sdma_timeout;
+ long video_timeout;
+ long compute_timeout;
+
+ uint64_t unique_id;
+ uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1065,6 +1108,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
+#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
/* Common functions */
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
@@ -1081,6 +1125,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 array_size);
bool amdgpu_device_is_px(struct drm_device *dev);
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
+
/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
void amdgpu_register_atpx_handler(void);
@@ -1170,5 +1217,24 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev );
static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }
#endif
+
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
+
#include "amdgpu_object.h"
+
+/* used by df_v3_6.c and amdgpu_pmu.c */
+#define AMDGPU_PMU_ATTR(_name, _object) \
+static ssize_t \
+_name##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_object) >= PAGE_SIZE - 1); \
+ return sprintf(page, _object "\n"); \
+} \
+ \
+static struct device_attribute pmu_attr_##_name = __ATTR_RO(_name)
+
#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 0a4fba196b84..eba42c752bca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -24,6 +24,7 @@
*/
#include <linux/irqdomain.h>
+#include <linux/pci.h>
#include <linux/pm_domain.h>
#include <linux/platform_device.h>
#include <sound/designware_i2s.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 56f8ca2a3bb4..1e41367ef74e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -27,7 +27,7 @@
#include <linux/power_supply.h>
#include <linux/pm_runtime.h>
#include <acpi/video.h>
-#include <drm/drmP.h>
+
#include <drm/drm_crtc_helper.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
index 3889486f71fe..a4d65973bf7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
@@ -25,7 +25,7 @@
*/
#include <linux/hdmi.h>
#include <linux/gcd.h>
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index aeead072fa79..9fa4f25a3745 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -22,11 +22,13 @@
#include "amdgpu_amdkfd.h"
#include "amd_shared.h"
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_gfx.h"
+#include "amdgpu_dma_buf.h"
#include <linux/module.h>
#include <linux/dma-buf.h>
+#include "amdgpu_xgmi.h"
static const unsigned int compute_vmid_bitmap = 0xFF00;
@@ -76,6 +78,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
case CHIP_VEGA10:
@@ -84,6 +87,9 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
case CHIP_RAVEN:
kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
break;
+ case CHIP_NAVI10:
+ kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions();
+ break;
default:
dev_info(adev->dev, "kfd not supported on this ASIC\n");
return;
@@ -148,21 +154,23 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
};
/* this is going to have a few of the MSBs set that we need to
- * clear */
+ * clear
+ */
bitmap_complement(gpu_resources.queue_bitmap,
adev->gfx.mec.queue_bitmap,
KGD_MAX_QUEUES);
/* remove the KIQ bit as well */
if (adev->gfx.kiq.ring.sched.ready)
- clear_bit(amdgpu_gfx_queue_to_bit(adev,
+ clear_bit(amdgpu_gfx_mec_queue_to_bit(adev,
adev->gfx.kiq.ring.me - 1,
adev->gfx.kiq.ring.pipe,
adev->gfx.kiq.ring.queue),
gpu_resources.queue_bitmap);
/* According to linux/bitmap.h we shouldn't use bitmap_clear if
- * nbits is not compile time constant */
+ * nbits is not compile time constant
+ */
last_valid_bit = 1 /* only first MEC can have compute queues */
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
@@ -335,6 +343,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
amdgpu_bo_unref(&(bo));
}
+int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
+ void **mem_obj)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ struct amdgpu_bo *bo = NULL;
+ struct amdgpu_bo_param bp;
+ int r;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = size;
+ bp.byte_align = 1;
+ bp.domain = AMDGPU_GEM_DOMAIN_GWS;
+ bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ bp.type = ttm_bo_type_device;
+ bp.resv = NULL;
+
+ r = amdgpu_bo_create(adev, &bp, &bo);
+ if (r) {
+ dev_err(adev->dev,
+ "failed to allocate gws BO for amdkfd (%d)\n", r);
+ return r;
+ }
+
+ *mem_obj = bo;
+ return 0;
+}
+
+void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
+{
+ struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
+
+ amdgpu_bo_unref(&bo);
+}
+
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
enum kgd_engine_type type)
{
@@ -398,9 +440,12 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
if (amdgpu_sriov_vf(adev))
mem_info->mem_clk_max = adev->clock.default_mclk / 100;
- else if (adev->powerplay.pp_funcs)
- mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
- else
+ else if (adev->powerplay.pp_funcs) {
+ if (amdgpu_emu_mode == 1)
+ mem_info->mem_clk_max = 0;
+ else
+ mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
+ } else
mem_info->mem_clk_max = 100;
}
@@ -518,6 +563,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
return adev->gmc.xgmi.hive_id;
}
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
+{
+ struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
+ struct amdgpu_device *adev = (struct amdgpu_device *)dst;
+ int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
+
+ if (ret < 0) {
+ DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
+ adev->gmc.xgmi.physical_node_id,
+ peer_adev->gmc.xgmi.physical_node_id, ret);
+ ret = 0;
+ }
+ return (uint8_t)ret;
+}
+
+uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ return adev->rmmio_remap.bus_addr;
+}
+
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ return adev->gds.gws_size;
+}
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
@@ -595,6 +668,13 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
+bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ return adev->have_atomics_support;
+}
+
#ifndef CONFIG_HSA_AMD
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
{
@@ -635,6 +715,11 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
return NULL;
}
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void)
+{
+ return NULL;
+}
+
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
const struct kfd2kgd_calls *f2g)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4e37fa7e85b1..b6076d19e442 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -61,7 +61,6 @@ struct kgd_mem {
atomic_t invalid;
struct amdkfd_process_info *process_info;
- struct page **user_pages;
struct amdgpu_sync sync;
@@ -136,10 +135,12 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
+bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
@@ -154,6 +155,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
+int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
+void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
enum kgd_engine_type type);
void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
@@ -169,6 +174,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
uint32_t *flags);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
+uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
#define read_user_wptr(mmptr, wptr, dst) \
({ \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
new file mode 100644
index 000000000000..0723f800e815
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -0,0 +1,975 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#undef pr_fmt
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include <linux/mmu_context.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ucode.h"
+#include "soc15_hw_ip.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "navi10_enum.h"
+#include "athub/athub_2_0_0_offset.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "oss/osssys_5_0_0_offset.h"
+#include "oss/osssys_5_0_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v10_structs.h"
+#include "nv.h"
+#include "nvd.h"
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES,
+ SAVE_WAVES
+};
+
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases);
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid);
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id);
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id);
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout);
+#if 0
+static uint32_t get_watch_base_addr(struct amdgpu_device *adev);
+#endif
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint64_t page_table_base);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+ struct tile_config *config)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
+#if 0
+/* TODO - confirm REG_GET_FIELD x2, should be OK as is... but
+ * MC_ARB_RAMCFG register doesn't exist on Vega10 - initial amdgpu
+ * changes commented out related code, doing the same here for now but
+ * need to sync with Ken et al
+ */
+ config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFBANK);
+ config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFRANKS);
+#endif
+
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+ config->num_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ config->macro_tile_config_ptr =
+ adev->gfx.config.macrotile_mode_array;
+ config->num_macro_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+ return 0;
+}
+
+static const struct kfd2kgd_calls kfd2kgd = {
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_pasid =
+ get_atc_vmid_pasid_mapping_pasid,
+ .get_atc_vmid_pasid_mapping_valid =
+ get_atc_vmid_pasid_mapping_valid,
+ .invalidate_tlbs = invalidate_tlbs,
+ .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .get_tile_config = amdgpu_amdkfd_get_tile_config,
+};
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions()
+{
+ return (struct kfd2kgd_calls *)&kfd2kgd;
+}
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+ return (struct amdgpu_device *)kgd;
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static uint32_t get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id) & 31;
+
+ return ((uint32_t)1) << bit;
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+ unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ lock_srbm(kgd, 0, 0, 0, vmid);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ /* APE1 no longer exists on GFX9 */
+
+ unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping);
+ /*
+ * need to do this twice, once for gfx and once for mmhub
+ * for ATC add 16 to VMID for mmhub, for IH different registers.
+ * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
+ */
+
+ pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid);
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+#if 0
+ /* TODO: uncomment this code when the hardware support is ready. */
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << vmid)))
+ cpu_relax();
+
+ pr_debug("ATHUB mapping update finished\n");
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << vmid);
+#endif
+
+ /* Mapping vmid to pasid also for IH block */
+ pr_debug("update mapping for IH block and mmhub");
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+ pasid_mapping);
+
+ return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, 0, 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(kgd);
+
+ return 0;
+}
+
+static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t base[2] = {
+ SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
+ /* On gfx10, mmSDMA1_xxx registers are defined NOT based
+ * on SDMA1 base address (dw 0x1860) but based on SDMA0
+ * base address (dw 0x1260). Therefore use mmSDMA0_RLC0_RB_CNTL
+ * instead of mmSDMA1_RLC0_RB_CNTL for the base address calc
+ * below
+ */
+ SOC15_REG_OFFSET(SDMA1, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL
+ };
+ uint32_t retval;
+
+ retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
+ mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("sdma base address: 0x%x\n", retval);
+
+ return retval;
+}
+
+#if 0
+static uint32_t get_watch_base_addr(struct amdgpu_device *adev)
+{
+ uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) -
+ mmTCP_WATCH0_ADDR_H;
+
+ pr_debug("kfd: reg watch base address: 0x%x\n", retval);
+
+ return retval;
+}
+#endif
+
+static inline struct v10_compute_mqd *get_mqd(void *mqd)
+{
+ return (struct v10_compute_mqd *)mqd;
+}
+
+static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v10_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v10_compute_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uint64_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uint64_t)wptr));
+ pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+ get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+
+ release_queue(kgd);
+
+ return 0;
+}
+
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(kgd);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+ pr_debug("sdma load base addr %x for engine %d, queue %d\n", sdma_base_addr, m->sdma_engine_id, m->sdma_queue_id);
+ sdmax_gfx_context_cntl = m->sdma_engine_id ?
+ SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
+ SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+ data = RREG32(sdmax_gfx_context_cntl);
+ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+ RESUME_CTX, 0);
+ WREG32(sdmax_gfx_context_cntl, data);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+ pr_debug("sdma dump engine id %d queue_id %d\n", engine_id, queue_id);
+ pr_debug("sdma base addr %x\n", sdma_base_addr);
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
+ high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+ retval = true;
+ }
+ release_queue(kgd);
+ return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v10_compute_mqd *m = get_mqd(mqd);
+
+#if 0
+ unsigned long flags;
+ int retry;
+#endif
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+#if 0 /* Is this still needed? */
+ /* Workaround: If IQ timer is active and the wait time is close to or
+ * equal to 0, dequeueing is not safe. Wait until either the wait time
+ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+ * cleared before continuing. Also, ensure wait times are set to at
+ * least 0x3.
+ */
+ local_irq_save(flags);
+ preempt_disable();
+ retry = 5000; /* wait for 500 usecs at maximum */
+ while (true) {
+ temp = RREG32(mmCP_HQD_IQ_TIMER);
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+ pr_debug("HW is processing IQ\n");
+ goto loop;
+ }
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+ == 3) /* SEM-rearm is safe */
+ break;
+ /* Wait time 3 is safe for CP, but our MMIO read/write
+ * time is close to 1 microsecond, so check for 10 to
+ * leave more buffer room
+ */
+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+ >= 10)
+ break;
+ pr_debug("IQ timer is active\n");
+ } else
+ break;
+loop:
+ if (!retry) {
+ pr_err("CP HQD IQ timer status time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ retry = 1000;
+ while (true) {
+ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+ break;
+ pr_debug("Dequeue request is pending\n");
+
+ if (!retry) {
+ pr_err("CP HQD dequeue request time out\n");
+ break;
+ }
+ ndelay(100);
+ --retry;
+ }
+ local_irq_restore(flags);
+ preempt_enable();
+#endif
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out.\n");
+ release_queue(kgd);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(kgd);
+ return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v10_sdma_mqd *m;
+ uint32_t sdma_base_addr;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ uint32_t req = (1 << vmid) |
+ (0 << GCVM_INVALIDATE_ENG0_REQ__FLUSH_TYPE__SHIFT) |/* legacy */
+ GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PTES_MASK |
+ GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE0_MASK |
+ GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE1_MASK |
+ GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE2_MASK |
+ GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L1_PTES_MASK;
+
+ mutex_lock(&adev->srbm_mutex);
+
+ /* Use light weight invalidation.
+ *
+ * TODO 1: agree on the right set of invalidation registers for
+ * KFD use. Use the last one for now. Invalidate only GCHUB as
+ * SDMA is now moved to GCHUB
+ *
+ * TODO 2: support range-based invalidation, requires kfg2kgd
+ * interface change
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32),
+ 0xffffffff);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32),
+ 0x0000001f);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ), req);
+
+ while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK)) &
+ (1 << vmid)))
+ cpu_relax();
+
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+{
+ signed long r;
+ uint32_t seq;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid));
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+
+ r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ int vmid;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+ if (amdgpu_emu_mode == 0 && ring->sched.ready)
+ return invalidate_tlbs_with_kiq(adev, pasid);
+
+ for (vmid = 0; vmid < 16; vmid++) {
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+ continue;
+ if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
+ if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
+ == pasid) {
+ write_vmid_invalidate_request(kgd, vmid);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("non kfd vmid %d\n", vmid);
+ return 0;
+ }
+
+ write_vmid_invalidate_request(kgd, vmid);
+ return 0;
+}
+
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+ return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo)
+{
+ return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SA_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset)
+{
+ return 0;
+}
+
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint64_t base = page_table_base | AMDGPU_PTE_VALID;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ /* TODO: take advantage of per-process address space size. For
+ * now, all processes share the same address space size, like
+ * on GFX8 and older.
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index fa09e11a600c..5f459bf5f622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -23,7 +23,7 @@
#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <linux/mmu_context.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "cikd.h"
@@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
- pr_debug("kfd: sdma base address: 0x%x\n", retval);
+ pr_debug("sdma base address: 0x%x\n", retval);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index fec3a6aa1de6..6d2f61449606 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -24,7 +24,7 @@
#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <linux/mmu_context.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "gfx_v8_0.h"
@@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
- pr_debug("kfd: sdma base address: 0x%x\n", retval);
+ pr_debug("sdma base address: 0x%x\n", retval);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index ef3d93b995b2..85395f2d83a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -26,7 +26,7 @@
#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <linux/mmu_context.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "soc15_hw_ip.h"
@@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
lock_srbm(kgd, 0, 0, 0, vmid);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
/* APE1 no longer exists on GFX9 */
unlock_srbm(kgd);
@@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
((mec << 5) | (pipe << 3) | queue_id | 0x80));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
}
/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
@@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
for (reg = hqd_base;
reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
- WREG32(reg, mqd_hqd[reg - hqd_base]);
+ WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
if (wptr) {
/* Don't read wptr with get_user because the user
@@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
lower_32_bits(guessed_wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
upper_32_bits(guessed_wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
lower_32_bits((uintptr_t)wptr));
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
get_queue_mask(adev, pipe_id, queue_id));
}
/* Start the EOP fetcher */
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
REG_SET_FIELD(m->cp_hqd_eop_rptr,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
release_queue(kgd);
@@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
acquire_queue(kgd, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
- WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+ WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
switch (reset_type) {
case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
break;
}
- WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
end_jiffies = (utimeout * HZ / 1000) + jiffies;
while (true) {
@@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
}
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- /* Use legacy mode tlb invalidation.
- *
- * Currently on Raven the code below is broken for anything but
- * legacy mode due to a MMHUB power gating problem. A workaround
- * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
- * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
- * bit.
- *
- * TODO 1: agree on the right set of invalidation registers for
- * KFD use. Use the last one for now. Invalidate both GC and
- * MMHUB.
- *
- * TODO 2: support range-based invalidation, requires kfg2kgd
- * interface change
- */
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
-}
-
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
+ uint32_t flush_type)
{
signed long r;
uint32_t seq;
@@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
PACKET3_INVALIDATE_TLBS_PASID(pasid) |
- PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
amdgpu_fence_emit_polling(ring, &seq);
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
@@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
int vmid;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ uint32_t flush_type = 0;
if (adev->in_gpu_reset)
return -EIO;
+ if (adev->gmc.xgmi.num_physical_nodes &&
+ adev->asic_type == CHIP_VEGA20)
+ flush_type = 2;
if (ring->sched.ready)
- return invalidate_tlbs_with_kiq(adev, pasid);
+ return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
@@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
== pasid) {
- write_vmid_invalidate_request(kgd, vmid);
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid,
+ flush_type);
break;
}
}
@@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
return 0;
}
- write_vmid_invalidate_request(kgd, vmid);
+ /* Use legacy mode tlb invalidation.
+ *
+ * Currently on Raven the code below is broken for anything but
+ * legacy mode due to a MMHUB power gating problem. A workaround
+ * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
+ * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
+ * bit.
+ *
+ * TODO 1: agree on the right set of invalidation registers for
+ * KFD use. Use the last one for now. Invalidate both GC and
+ * MMHUB.
+ *
+ * TODO 2: support range-based invalidation, requires kfg2kgd
+ * interface change
+ */
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
return 0;
}
@@ -838,7 +837,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
mutex_lock(&adev->grbm_idx_mutex);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
@@ -848,7 +847,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES, 1);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a6e5184d436c..1d3ee9c42f7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -22,14 +22,16 @@
#define pr_fmt(fmt) "kfd2kgd: " fmt
+#include <linux/dma-buf.h>
#include <linux/list.h>
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
-#include <linux/dma-buf.h>
-#include <drm/drmP.h>
+#include <linux/sched/task.h>
+
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_dma_buf.h"
/* Special VM and GART address alignment needed for VI pre-Fiji due to
* a HW bug.
@@ -456,6 +458,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
mutex_unlock(&process_info->lock);
}
+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
+ struct amdkfd_process_info *process_info)
+{
+ struct ttm_validate_buffer *bo_list_entry;
+
+ bo_list_entry = &mem->validate_list;
+ mutex_lock(&process_info->lock);
+ list_del(&bo_list_entry->head);
+ mutex_unlock(&process_info->lock);
+}
+
/* Initializes user pages. It registers the MMU notifier and validates
* the userptr BO in the GTT domain.
*
@@ -491,28 +504,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
goto out;
}
- /* If no restore worker is running concurrently, user_pages
- * should not be allocated
- */
- WARN(mem->user_pages, "Leaking user_pages array");
-
- mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
- sizeof(struct page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!mem->user_pages) {
- pr_err("%s: Failed to allocate pages array\n", __func__);
- ret = -ENOMEM;
- goto unregister_out;
- }
-
- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
- goto free_out;
+ goto unregister_out;
}
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
-
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("%s: Failed to reserve BO\n", __func__);
@@ -525,11 +522,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
amdgpu_bo_unreserve(bo);
release_out:
- if (ret)
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-free_out:
- kvfree(mem->user_pages);
- mem->user_pages = NULL;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
unregister_out:
if (ret)
amdgpu_mn_unregister(bo);
@@ -588,13 +581,12 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
- ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
- false, &ctx->duplicates);
+ false, &ctx->duplicates, true);
if (!ret)
ctx->reserved = true;
else {
@@ -652,7 +644,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
- ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
i = 0;
@@ -668,7 +659,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
}
ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
- false, &ctx->duplicates);
+ false, &ctx->duplicates, true);
if (!ret)
ctx->reserved = true;
else
@@ -822,7 +813,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
ret = amdgpu_sync_resv(NULL,
sync, pd->tbo.resv,
- AMDGPU_FENCE_OWNER_UNDEFINED, false);
+ AMDGPU_FENCE_OWNER_KFD, false);
if (ret)
return ret;
}
@@ -896,6 +887,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
AMDGPU_FENCE_OWNER_KFD, false);
if (ret)
goto wait_pd_fail;
+ ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);
+ if (ret)
+ goto reserve_shared_fail;
amdgpu_bo_fence(vm->root.base.bo,
&vm->process_info->eviction_fence->base, true);
amdgpu_bo_unreserve(vm->root.base.bo);
@@ -909,6 +903,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
return 0;
+reserve_shared_fail:
wait_pd_fail:
validate_pd_fail:
amdgpu_bo_unreserve(vm->root.base.bo);
@@ -1109,7 +1104,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (!offset || !*offset)
return -EINVAL;
user_addr = *offset;
- } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
+ } else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |
+ ALLOC_MEM_FLAGS_MMIO_REMAP)) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
bo_type = ttm_bo_type_sg;
@@ -1199,12 +1195,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (user_addr) {
ret = init_user_pages(*mem, current->mm, user_addr);
- if (ret) {
- mutex_lock(&avm->process_info->lock);
- list_del(&(*mem)->validate_list.head);
- mutex_unlock(&avm->process_info->lock);
+ if (ret)
goto allocate_init_user_pages_failed;
- }
}
if (offset)
@@ -1213,6 +1205,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
return 0;
allocate_init_user_pages_failed:
+ remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
amdgpu_bo_unref(&bo);
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
@@ -1262,15 +1255,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
- /* Free user pages if necessary */
- if (mem->user_pages) {
- pr_debug("%s: Freeing user_pages array\n", __func__);
- if (mem->user_pages[0])
- release_pages(mem->user_pages,
- mem->bo->tbo.ttm->num_pages);
- kvfree(mem->user_pages);
- }
-
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
@@ -1294,8 +1278,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Free the sync object */
amdgpu_sync_free(&mem->sync);
- /* If the SG is not NULL, it's one we created for a doorbell
- * BO. We need to free it.
+ /* If the SG is not NULL, it's one we created for a doorbell or mmio
+ * remap BO. We need to free it.
*/
if (mem->bo->tbo.sg) {
sg_free_table(mem->bo->tbo.sg);
@@ -1409,7 +1393,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
is_invalid_userptr);
if (ret) {
- pr_err("Failed to map radeon bo to gpuvm\n");
+ pr_err("Failed to map bo to gpuvm\n");
goto map_bo_to_gpuvm_failed;
}
@@ -1744,36 +1728,20 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
bo = mem->bo;
- if (!mem->user_pages) {
- mem->user_pages =
- kvmalloc_array(bo->tbo.ttm->num_pages,
- sizeof(struct page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!mem->user_pages) {
- pr_err("%s: Failed to allocate pages array\n",
- __func__);
- return -ENOMEM;
- }
- } else if (mem->user_pages[0]) {
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
- }
-
/* Get updated user pages */
- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
- mem->user_pages);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
if (ret) {
- mem->user_pages[0] = NULL;
- pr_info("%s: Failed to get user pages: %d\n",
+ pr_debug("%s: Failed to get user pages: %d\n",
__func__, ret);
- /* Pretend it succeeded. It will fail later
- * with a VM fault if the GPU tries to access
- * it. Better than hanging indefinitely with
- * stalled user mode queues.
- */
+
+ /* Return error -EBUSY or -ENOMEM, retry restore */
+ return ret;
}
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+
/* Mark the BO as valid unless it was invalidated
- * again concurrently
+ * again concurrently.
*/
if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
return -EAGAIN;
@@ -1806,7 +1774,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
GFP_KERNEL);
if (!pd_bo_list_entries) {
pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_no_mem;
}
INIT_LIST_HEAD(&resv_list);
@@ -1827,10 +1796,11 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
/* Reserve all BOs and page tables for validation */
- ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+ ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates,
+ true);
WARN(!list_empty(&duplicates), "Duplicates should be empty");
if (ret)
- goto out;
+ goto out_free;
amdgpu_sync_create(&sync);
@@ -1846,10 +1816,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
bo = mem->bo;
- /* Copy pages array and validate the BO if we got user pages */
- if (mem->user_pages[0]) {
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
- mem->user_pages);
+ /* Validate the BO if we got user pages */
+ if (bo->tbo.ttm->pages[0]) {
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret) {
@@ -1858,13 +1826,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}
- /* Validate succeeded, now the BO owns the pages, free
- * our copy of the pointer array. Put this BO back on
- * the userptr_valid_list. If we need to revalidate
- * it, we need to start from scratch.
- */
- kvfree(mem->user_pages);
- mem->user_pages = NULL;
list_move_tail(&mem->validate_list.head,
&process_info->userptr_valid_list);
@@ -1897,8 +1858,9 @@ unreserve_out:
ttm_eu_backoff_reservation(&ticket, &resv_list);
amdgpu_sync_wait(&sync, false);
amdgpu_sync_free(&sync);
-out:
+out_free:
kfree(pd_bo_list_entries);
+out_no_mem:
return ret;
}
@@ -1963,6 +1925,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
* hanging. No point trying again.
*/
}
+
unlock_out:
mutex_unlock(&process_info->lock);
mmput(mm);
@@ -2032,7 +1995,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
}
ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
- false, &duplicate_save);
+ false, &duplicate_save, true);
if (ret) {
pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
goto ttm_reserve_fail;
@@ -2130,3 +2093,92 @@ ttm_reserve_fail:
kfree(pd_bo_list);
return ret;
}
+
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
+{
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+ struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
+ int ret;
+
+ if (!info || !gws)
+ return -EINVAL;
+
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if (!*mem)
+ return -ENOMEM;
+
+ mutex_init(&(*mem)->lock);
+ (*mem)->bo = amdgpu_bo_ref(gws_bo);
+ (*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
+ (*mem)->process_info = process_info;
+ add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
+ amdgpu_sync_create(&(*mem)->sync);
+
+
+ /* Validate gws bo the first time it is added to process */
+ mutex_lock(&(*mem)->process_info->lock);
+ ret = amdgpu_bo_reserve(gws_bo, false);
+ if (unlikely(ret)) {
+ pr_err("Reserve gws bo failed %d\n", ret);
+ goto bo_reservation_failure;
+ }
+
+ ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
+ if (ret) {
+ pr_err("GWS BO validate failed %d\n", ret);
+ goto bo_validation_failure;
+ }
+ /* GWS resource is shared b/t amdgpu and amdkfd
+ * Add process eviction fence to bo so they can
+ * evict each other.
+ */
+ ret = reservation_object_reserve_shared(gws_bo->tbo.resv, 1);
+ if (ret)
+ goto reserve_shared_fail;
+ amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
+ amdgpu_bo_unreserve(gws_bo);
+ mutex_unlock(&(*mem)->process_info->lock);
+
+ return ret;
+
+reserve_shared_fail:
+bo_validation_failure:
+ amdgpu_bo_unreserve(gws_bo);
+bo_reservation_failure:
+ mutex_unlock(&(*mem)->process_info->lock);
+ amdgpu_sync_free(&(*mem)->sync);
+ remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
+ amdgpu_bo_unref(&gws_bo);
+ mutex_destroy(&(*mem)->lock);
+ kfree(*mem);
+ *mem = NULL;
+ return ret;
+}
+
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
+{
+ int ret;
+ struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+ struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+ struct amdgpu_bo *gws_bo = kgd_mem->bo;
+
+ /* Remove BO from process's validate list so restore worker won't touch
+ * it anymore
+ */
+ remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
+
+ ret = amdgpu_bo_reserve(gws_bo, false);
+ if (unlikely(ret)) {
+ pr_err("Reserve gws bo failed %d\n", ret);
+ //TODO add BO back to validate_list?
+ return ret;
+ }
+ amdgpu_amdkfd_remove_eviction_fence(gws_bo,
+ process_info->eviction_fence);
+ amdgpu_bo_unreserve(gws_bo);
+ amdgpu_sync_free(&kgd_mem->sync);
+ amdgpu_bo_unref(&gws_bo);
+ mutex_destroy(&kgd_mem->lock);
+ kfree(mem);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index e02781b37e73..1c9d40f97a9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -23,7 +23,7 @@
* Authors: Dave Airlie
* Alex Deucher
*/
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_atombios.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index f96d75c6e099..daf687428cdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "atomfirmware.h"
@@ -118,6 +118,7 @@ union umc_info {
union vram_info {
struct atom_vram_info_header_v2_3 v23;
+ struct atom_vram_info_header_v2_4 v24;
};
/*
* Return vram width from integrated system info table, if available,
@@ -126,22 +127,50 @@ union vram_info {
int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
- int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- integratedsysteminfo);
+ int index;
u16 data_offset, size;
union igp_info *igp_info;
+ union vram_info *vram_info;
+ u32 mem_channel_number;
+ u32 mem_channel_width;
u8 frev, crev;
+ if (adev->flags & AMD_IS_APU)
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ integratedsysteminfo);
+ else
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ vram_info);
+
/* get any igp specific overrides */
if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
&frev, &crev, &data_offset)) {
- igp_info = (union igp_info *)
- (mode_info->atom_context->bios + data_offset);
- switch (crev) {
- case 11:
- return igp_info->v11.umachannelnumber * 64;
- default:
- return 0;
+ if (adev->flags & AMD_IS_APU) {
+ igp_info = (union igp_info *)
+ (mode_info->atom_context->bios + data_offset);
+ switch (crev) {
+ case 11:
+ mem_channel_number = igp_info->v11.umachannelnumber;
+ /* channel width is 64 */
+ return mem_channel_number * 64;
+ default:
+ return 0;
+ }
+ } else {
+ vram_info = (union vram_info *)
+ (mode_info->atom_context->bios + data_offset);
+ switch (crev) {
+ case 3:
+ mem_channel_number = vram_info->v23.vram_module[0].channel_num;
+ mem_channel_width = vram_info->v23.vram_module[0].channel_width;
+ return mem_channel_number * (1 << mem_channel_width);
+ case 4:
+ mem_channel_number = vram_info->v24.vram_module[0].channel_num;
+ mem_channel_width = vram_info->v24.vram_module[0].channel_width;
+ return mem_channel_number * (1 << mem_channel_width);
+ default:
+ return 0;
+ }
}
}
@@ -179,6 +208,9 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
case ATOM_DGPU_VRAM_TYPE_HBM2:
vram_type = AMDGPU_VRAM_TYPE_HBM;
break;
+ case ATOM_DGPU_VRAM_TYPE_GDDR6:
+ vram_type = AMDGPU_VRAM_TYPE_GDDR6;
+ break;
default:
vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
break;
@@ -227,6 +259,9 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
case 3:
mem_type = vram_info->v23.vram_module[0].memory_type;
return convert_atom_mem_type_to_vram_type(adev, mem_type);
+ case 4:
+ mem_type = vram_info->v24.vram_module[0].memory_type;
+ return convert_atom_mem_type_to_vram_type(adev, mem_type);
default:
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 3079ea8523c5..649e68c4479b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -21,7 +21,7 @@
*
* Authors: Jerome Glisse
*/
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index a5df80d50d44..50dff69a0f6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -25,10 +25,11 @@
* Alex Deucher
* Jerome Glisse
*/
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "atom.h"
+#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/acpi.h>
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 5c79da8e1150..7bcf86c61999 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -28,7 +28,8 @@
* Christian König <deathsimple@vodafone.de>
*/
-#include <drm/drmP.h>
+#include <linux/uaccess.h>
+
#include "amdgpu.h"
#include "amdgpu_trace.h"
@@ -81,9 +82,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
return -ENOMEM;
kref_init(&list->refcount);
- list->gds_obj = adev->gds.gds_gfx_bo;
- list->gws_obj = adev->gds.gws_gfx_bo;
- list->oa_obj = adev->gds.oa_gfx_bo;
+ list->gds_obj = NULL;
+ list->gws_obj = NULL;
+ list->oa_obj = NULL;
array = amdgpu_bo_list_array_entry(list, 0);
memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 7c5f5d1601e6..a130e766cbdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
struct amdgpu_bo_va *bo_va;
uint32_t priority;
struct page **user_pages;
- int user_invalidated;
+ bool user_invalidated;
};
struct amdgpu_bo_list {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 387f1cf1dc20..031b094607bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -22,8 +22,9 @@
*
*/
#include <linux/list.h>
+#include <linux/pci.h>
#include <linux/slab.h>
-#include <drm/drmP.h>
+
#include <linux/firmware.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index bf04c12bd324..73b2ede773d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -23,7 +23,7 @@
* Authors: Dave Airlie
* Alex Deucher
*/
-#include <drm/drmP.h>
+
#include <drm/drm_edid.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_probe_helper.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2f6239b6be6f..e069de8b54e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -24,9 +24,11 @@
* Authors:
* Jerome Glisse <glisse@freedesktop.org>
*/
+
+#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/sync_file.h>
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
#include "amdgpu.h"
@@ -52,7 +54,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
p->uf_entry.tv.bo = &bo->tbo;
/* One for TTM and one for the CS job */
p->uf_entry.tv.num_shared = 2;
- p->uf_entry.user_pages = NULL;
drm_gem_object_put_unlocked(gobj);
@@ -542,14 +543,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
if (usermm && usermm != current->mm)
return -EPERM;
- /* Check if we have user pages and nobody bound the BO already */
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
- lobj->user_pages) {
+ if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
+ lobj->user_invalidated && lobj->user_pages) {
amdgpu_bo_placement_from_domain(bo,
AMDGPU_GEM_DOMAIN_CPU);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return r;
+
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
lobj->user_pages);
binding_userptr = true;
@@ -580,7 +581,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_bo *gds;
struct amdgpu_bo *gws;
struct amdgpu_bo *oa;
- unsigned tries = 10;
int r;
INIT_LIST_HEAD(&p->validated);
@@ -616,79 +616,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
list_add(&p->uf_entry.tv.head, &p->validated);
- while (1) {
- struct list_head need_pages;
-
- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
- &duplicates);
- if (unlikely(r != 0)) {
- if (r != -ERESTARTSYS)
- DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
- goto error_free_pages;
- }
-
- INIT_LIST_HEAD(&need_pages);
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
- if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
- &e->user_invalidated) && e->user_pages) {
-
- /* We acquired a page array, but somebody
- * invalidated it. Free it and try again
- */
- release_pages(e->user_pages,
- bo->tbo.ttm->num_pages);
- kvfree(e->user_pages);
- e->user_pages = NULL;
- }
-
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
- !e->user_pages) {
- list_del(&e->tv.head);
- list_add(&e->tv.head, &need_pages);
-
- amdgpu_bo_unreserve(bo);
- }
+ /* Get userptr backing pages. If pages are updated after registered
+ * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
+ * amdgpu_ttm_backend_bind() to flush and invalidate new pages
+ */
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ bool userpage_invalidated = false;
+ int i;
+
+ e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!e->user_pages) {
+ DRM_ERROR("calloc failure\n");
+ return -ENOMEM;
}
- if (list_empty(&need_pages))
- break;
-
- /* Unreserve everything again. */
- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
- /* We tried too many times, just abort */
- if (!--tries) {
- r = -EDEADLK;
- DRM_ERROR("deadlock in %s\n", __func__);
- goto error_free_pages;
+ r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
+ if (r) {
+ kvfree(e->user_pages);
+ e->user_pages = NULL;
+ return r;
}
- /* Fill the page arrays for all userptrs. */
- list_for_each_entry(e, &need_pages, tv.head) {
- struct ttm_tt *ttm = e->tv.bo->ttm;
-
- e->user_pages = kvmalloc_array(ttm->num_pages,
- sizeof(struct page*),
- GFP_KERNEL | __GFP_ZERO);
- if (!e->user_pages) {
- r = -ENOMEM;
- DRM_ERROR("calloc failure in %s\n", __func__);
- goto error_free_pages;
- }
-
- r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
- if (r) {
- DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
- kvfree(e->user_pages);
- e->user_pages = NULL;
- goto error_free_pages;
+ for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
+ if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+ userpage_invalidated = true;
+ break;
}
}
+ e->user_invalidated = userpage_invalidated;
+ }
- /* And try again. */
- list_splice(&need_pages, &p->validated);
+ r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+ &duplicates, false);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+ goto out;
}
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -707,16 +673,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
r = amdgpu_cs_list_validate(p, &duplicates);
- if (r) {
- DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n");
+ if (r)
goto error_validate;
- }
r = amdgpu_cs_list_validate(p, &p->validated);
- if (r) {
- DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n");
+ if (r)
goto error_validate;
- }
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
p->bytes_moved_vis);
@@ -757,17 +719,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
error_validate:
if (r)
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-error_free_pages:
-
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- if (!e->user_pages)
- continue;
-
- release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
- kvfree(e->user_pages);
- }
-
+out:
return r;
}
@@ -922,7 +874,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
struct dma_fence *f;
bo_va = fpriv->csa_va;
@@ -1011,7 +963,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
- if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) {
+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
+ (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
ce_preempt++;
@@ -1054,11 +1007,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
j++;
}
- /* UVD & VCE fw doesn't support user fences */
+ /* MM engine doesn't support user fences */
ring = to_amdgpu_ring(parser->entity->rq->sched);
- if (parser->job->uf_addr && (
- ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
- ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+ if (parser->job->uf_addr && ring->funcs->no_user_fence)
return -EINVAL;
return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
@@ -1328,7 +1279,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
struct amdgpu_bo_list_entry *e;
struct amdgpu_job *job;
uint64_t seq;
-
int r;
job = p->job;
@@ -1338,15 +1288,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
- /* No memory allocation is allowed while holding the mn lock */
+ /* No memory allocation is allowed while holding the mn lock.
+ * p->mn is hold until amdgpu_cs_submit is finished and fence is added
+ * to BOs.
+ */
amdgpu_mn_lock(p->mn);
+
+ /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
+ * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
+ */
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
- r = -ERESTARTSYS;
- goto error_abort;
- }
+ r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ }
+ if (r) {
+ r = -EAGAIN;
+ goto error_abort;
}
job->owner = p->filp;
@@ -1424,7 +1382,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (r) {
if (r == -ENOMEM)
DRM_ERROR("Not enough memory for command submission!\n");
- else if (r != -ERESTARTSYS)
+ else if (r != -ERESTARTSYS && r != -EAGAIN)
DRM_ERROR("Failed to process the buffer list %d!\n", r);
goto out;
}
@@ -1442,6 +1400,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 54dd02a898b9..35a8d3c96fc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -47,6 +47,7 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo
return -ENOMEM;
memset(ptr, 0, size);
+ adev->virt.csa_cpu_addr = ptr;
return 0;
}
@@ -79,7 +80,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
list_add(&csa_tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &pd);
- r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, false);
if (r) {
DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a28a3d722ba2..f539a2a92774 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -22,7 +22,6 @@
* Authors: monk liu <monk.liu@amd.com>
*/
-#include <drm/drmP.h>
#include <drm/drm_auth.h>
#include "amdgpu.h"
#include "amdgpu_sched.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 8930d66f2204..20ce158490db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -24,8 +24,11 @@
*/
#include <linux/kthread.h>
-#include <drm/drmP.h>
-#include <linux/debugfs.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_debugfs.h>
+
#include "amdgpu.h"
/**
@@ -920,17 +923,195 @@ static const struct drm_info_list amdgpu_debugfs_list[] = {
{"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt},
};
+static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring,
+ struct dma_fence **fences)
+{
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ uint32_t sync_seq, last_seq;
+
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
+ sync_seq = ring->fence_drv.sync_seq;
+
+ last_seq &= drv->num_fences_mask;
+ sync_seq &= drv->num_fences_mask;
+
+ do {
+ struct dma_fence *fence, **ptr;
+
+ ++last_seq;
+ last_seq &= drv->num_fences_mask;
+ ptr = &drv->fences[last_seq];
+
+ fence = rcu_dereference_protected(*ptr, 1);
+ RCU_INIT_POINTER(*ptr, NULL);
+
+ if (!fence)
+ continue;
+
+ fences[last_seq] = fence;
+
+ } while (last_seq != sync_seq);
+}
+
+static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
+ int length)
+{
+ int i;
+ struct dma_fence *fence;
+
+ for (i = 0; i < length; i++) {
+ fence = fences[i];
+ if (!fence)
+ continue;
+ dma_fence_signal(fence);
+ dma_fence_put(fence);
+ }
+}
+
+static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched)
+{
+ struct drm_sched_job *s_job;
+ struct dma_fence *fence;
+
+ spin_lock(&sched->job_list_lock);
+ list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
+ fence = sched->ops->run_job(s_job);
+ dma_fence_put(fence);
+ }
+ spin_unlock(&sched->job_list_lock);
+}
+
+static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
+{
+ struct amdgpu_job *job;
+ struct drm_sched_job *s_job;
+ uint32_t preempt_seq;
+ struct dma_fence *fence, **ptr;
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ struct drm_gpu_scheduler *sched = &ring->sched;
+
+ if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
+ return;
+
+ preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2));
+ if (preempt_seq <= atomic_read(&drv->last_seq))
+ return;
+
+ preempt_seq &= drv->num_fences_mask;
+ ptr = &drv->fences[preempt_seq];
+ fence = rcu_dereference_protected(*ptr, 1);
+
+ spin_lock(&sched->job_list_lock);
+ list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
+ job = to_amdgpu_job(s_job);
+ if (job->fence == fence)
+ /* mark the job as preempted */
+ job->preemption_status |= AMDGPU_IB_PREEMPTED;
+ }
+ spin_unlock(&sched->job_list_lock);
+}
+
+static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
+{
+ int r, resched, length;
+ struct amdgpu_ring *ring;
+ struct dma_fence **fences = NULL;
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+
+ if (val >= AMDGPU_MAX_RINGS)
+ return -EINVAL;
+
+ ring = adev->rings[val];
+
+ if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
+ return -EINVAL;
+
+ /* the last preemption failed */
+ if (ring->trail_seq != le32_to_cpu(*ring->trail_fence_cpu_addr))
+ return -EBUSY;
+
+ length = ring->fence_drv.num_fences_mask + 1;
+ fences = kcalloc(length, sizeof(void *), GFP_KERNEL);
+ if (!fences)
+ return -ENOMEM;
+
+ /* stop the scheduler */
+ kthread_park(ring->sched.thread);
+
+ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+
+ /* preempt the IB */
+ r = amdgpu_ring_preempt_ib(ring);
+ if (r) {
+ DRM_WARN("failed to preempt ring %d\n", ring->idx);
+ goto failure;
+ }
+
+ amdgpu_fence_process(ring);
+
+ if (atomic_read(&ring->fence_drv.last_seq) !=
+ ring->fence_drv.sync_seq) {
+ DRM_INFO("ring %d was preempted\n", ring->idx);
+
+ amdgpu_ib_preempt_mark_partial_job(ring);
+
+ /* swap out the old fences */
+ amdgpu_ib_preempt_fences_swap(ring, fences);
+
+ amdgpu_fence_driver_force_completion(ring);
+
+ /* resubmit unfinished jobs */
+ amdgpu_ib_preempt_job_recovery(&ring->sched);
+
+ /* wait for jobs finished */
+ amdgpu_fence_wait_empty(ring);
+
+ /* signal the old fences */
+ amdgpu_ib_preempt_signal_fences(fences, length);
+ }
+
+failure:
+ /* restart the scheduler */
+ kthread_unpark(ring->sched.thread);
+
+ ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
+
+ if (fences)
+ kfree(fences);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL,
+ amdgpu_debugfs_ib_preempt, "%llu\n");
+
int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
+ adev->debugfs_preempt =
+ debugfs_create_file("amdgpu_preempt_ib", 0600,
+ adev->ddev->primary->debugfs_root,
+ (void *)adev, &fops_ib_preempt);
+ if (!(adev->debugfs_preempt)) {
+ DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
+ return -EIO;
+ }
+
return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
ARRAY_SIZE(amdgpu_debugfs_list));
}
+void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev)
+{
+ if (adev->debugfs_preempt)
+ debugfs_remove(adev->debugfs_preempt);
+}
+
#else
int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
return 0;
}
+void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) { }
int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 8260d8073c26..f289d28ad6b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -34,6 +34,7 @@ struct amdgpu_debugfs {
int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
int amdgpu_debugfs_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev);
int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
const struct drm_info_list *files,
unsigned nfiles);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f4ac632a87b2..7401bc95c15b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -27,9 +27,10 @@
*/
#include <linux/power_supply.h>
#include <linux/kthread.h>
+#include <linux/module.h>
#include <linux/console.h>
#include <linux/slab.h>
-#include <drm/drmP.h>
+
#include <drm/drm_atomic_helper.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
@@ -51,6 +52,7 @@
#endif
#include "vi.h"
#include "soc15.h"
+#include "nv.h"
#include "bif/bif_4_1_d.h"
#include <linux/pci.h>
#include <linux/firmware.h>
@@ -61,12 +63,14 @@
#include "amdgpu_xgmi.h"
#include "amdgpu_ras.h"
+#include "amdgpu_pmu.h"
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
@@ -94,9 +98,32 @@ static const char *amdgpu_asic_name[] = {
"VEGA12",
"VEGA20",
"RAVEN",
+ "NAVI10",
"LAST",
};
+/**
+ * DOC: pcie_replay_count
+ *
+ * The amdgpu driver provides a sysfs API for reporting the total number
+ * of PCIe replays (NAKs)
+ * The file pcie_replay_count is used for this and returns the total
+ * number of replays as a sum of the NAKs generated and NAKs received
+ */
+
+static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
+}
+
+static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
+ amdgpu_device_get_pcie_replay_count, NULL);
+
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
/**
@@ -484,7 +511,10 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
} else {
tmp = RREG32(reg);
tmp &= ~and_mask;
- tmp |= or_mask;
+ if (adev->family >= AMDGPU_FAMILY_AI)
+ tmp |= (or_mask & and_mask);
+ else
+ tmp |= or_mask;
}
WREG32(reg, tmp);
}
@@ -910,8 +940,10 @@ def_value:
* Validates certain module parameters and updates
* the associated values used by the driver (all asics).
*/
-static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
+static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
{
+ int ret = 0;
+
if (amdgpu_sched_jobs < 4) {
dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
amdgpu_sched_jobs);
@@ -949,19 +981,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
amdgpu_device_check_block_size(adev);
- if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
- !is_power_of_2(amdgpu_vram_page_split))) {
- dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
- amdgpu_vram_page_split);
- amdgpu_vram_page_split = 1024;
- }
-
- if (amdgpu_lockup_timeout == 0) {
- dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
- amdgpu_lockup_timeout = 10000;
+ ret = amdgpu_device_get_job_timeout_settings(adev);
+ if (ret) {
+ dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+ return ret;
}
adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
+
+ return ret;
}
/**
@@ -1356,6 +1384,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
else
chip_name = "raven";
break;
+ case CHIP_NAVI10:
+ chip_name = "navi10";
+ break;
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
@@ -1402,6 +1433,23 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
adev->gfx.cu_info.max_scratch_slots_per_cu =
le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
+ if (hdr->version_minor >= 1) {
+ const struct gpu_info_firmware_v1_1 *gpu_info_fw =
+ (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ adev->gfx.config.num_sc_per_sh =
+ le32_to_cpu(gpu_info_fw->num_sc_per_sh);
+ adev->gfx.config.num_packer_per_sc =
+ le32_to_cpu(gpu_info_fw->num_packer_per_sc);
+ }
+#ifdef CONFIG_DRM_AMD_DC_DCN2_0
+ if (hdr->version_minor == 2) {
+ const struct gpu_info_firmware_v1_2 *gpu_info_fw =
+ (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
+ }
+#endif
break;
}
default:
@@ -1490,6 +1538,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (r)
return r;
break;
+ case CHIP_NAVI10:
+ adev->family = AMDGPU_FAMILY_NV;
+
+ r = nv_set_ip_blocks(adev);
+ if (r)
+ return r;
+ break;
default:
/* FIXME: not supported yet */
return -EINVAL;
@@ -1505,6 +1560,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
r = amdgpu_virt_request_full_gpu(adev, true);
if (r)
return -EAGAIN;
+
+ /* query the reg access mode at the very beginning */
+ amdgpu_virt_init_reg_access_mode(adev);
}
adev->pm.pp_feature = amdgpu_pp_feature_mask;
@@ -1532,6 +1590,19 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
adev->ip_blocks[i].status.valid = true;
}
}
+ /* get the vbios after the asic_funcs are set up */
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+ /* Read BIOS */
+ if (!amdgpu_get_bios(adev))
+ return -EINVAL;
+
+ r = amdgpu_atombios_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+ return r;
+ }
+ }
}
adev->cg_flags &= amdgpu_cg_mask;
@@ -1550,6 +1621,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
if (adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
r = adev->ip_blocks[i].version->funcs->hw_init(adev);
if (r) {
@@ -1670,7 +1742,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
adev->ip_blocks[i].status.hw = true;
/* right after GMC hw init, we create CSA */
- if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_CSA_SIZE);
@@ -1821,6 +1893,43 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power
return 0;
}
+static int amdgpu_device_enable_mgpu_fan_boost(void)
+{
+ struct amdgpu_gpu_instance *gpu_ins;
+ struct amdgpu_device *adev;
+ int i, ret = 0;
+
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * MGPU fan boost feature should be enabled
+ * only when there are two or more dGPUs in
+ * the system
+ */
+ if (mgpu_info.num_dgpu < 2)
+ goto out;
+
+ for (i = 0; i < mgpu_info.num_dgpu; i++) {
+ gpu_ins = &(mgpu_info.gpu_ins[i]);
+ adev = gpu_ins->adev;
+ if (!(adev->flags & AMD_IS_APU) &&
+ !gpu_ins->mgpu_fan_enabled &&
+ adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
+ ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
+ if (ret)
+ break;
+
+ gpu_ins->mgpu_fan_enabled = 1;
+ }
+ }
+
+out:
+ mutex_unlock(&mgpu_info.mutex);
+
+ return ret;
+}
+
/**
* amdgpu_device_ip_late_init - run late init for hardware IPs
*
@@ -1854,11 +1963,15 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
- queue_delayed_work(system_wq, &adev->late_init_work,
- msecs_to_jiffies(AMDGPU_RESUME_MS));
-
amdgpu_device_fill_reset_magic(adev);
+ r = amdgpu_device_enable_mgpu_fan_boost();
+ if (r)
+ DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ /* set to low pstate by default */
+ amdgpu_xgmi_set_pstate(adev, 0);
+
return 0;
}
@@ -1957,65 +2070,20 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
return 0;
}
-static int amdgpu_device_enable_mgpu_fan_boost(void)
-{
- struct amdgpu_gpu_instance *gpu_ins;
- struct amdgpu_device *adev;
- int i, ret = 0;
-
- mutex_lock(&mgpu_info.mutex);
-
- /*
- * MGPU fan boost feature should be enabled
- * only when there are two or more dGPUs in
- * the system
- */
- if (mgpu_info.num_dgpu < 2)
- goto out;
-
- for (i = 0; i < mgpu_info.num_dgpu; i++) {
- gpu_ins = &(mgpu_info.gpu_ins[i]);
- adev = gpu_ins->adev;
- if (!(adev->flags & AMD_IS_APU) &&
- !gpu_ins->mgpu_fan_enabled &&
- adev->powerplay.pp_funcs &&
- adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
- ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
- if (ret)
- break;
-
- gpu_ins->mgpu_fan_enabled = 1;
- }
- }
-
-out:
- mutex_unlock(&mgpu_info.mutex);
-
- return ret;
-}
-
/**
- * amdgpu_device_ip_late_init_func_handler - work handler for ib test
+ * amdgpu_device_delayed_init_work_handler - work handler for IB tests
*
* @work: work_struct.
*/
-static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
+static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
- container_of(work, struct amdgpu_device, late_init_work.work);
+ container_of(work, struct amdgpu_device, delayed_init_work.work);
int r;
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
-
- r = amdgpu_device_enable_mgpu_fan_boost();
- if (r)
- DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
-
- /*set to low pstate by default */
- amdgpu_xgmi_set_pstate(adev, 0);
-
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
@@ -2356,6 +2424,9 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
case CHIP_RAVEN:
#endif
+#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
+ case CHIP_NAVI10:
+#endif
return amdgpu_dc != 0;
#endif
default:
@@ -2467,7 +2538,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->lock_reset);
mutex_init(&adev->virt.dpm_mutex);
- amdgpu_device_check_arguments(adev);
+ r = amdgpu_device_check_arguments(adev);
+ if (r)
+ return r;
spin_lock_init(&adev->mmio_idx_lock);
spin_lock_init(&adev->smc_idx_lock);
@@ -2485,8 +2558,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_LIST_HEAD(&adev->ring_lru_list);
spin_lock_init(&adev->ring_lru_list_lock);
- INIT_DELAYED_WORK(&adev->late_init_work,
- amdgpu_device_ip_late_init_func_handler);
+ INIT_DELAYED_WORK(&adev->delayed_init_work,
+ amdgpu_device_delayed_init_work_handler);
INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
amdgpu_device_delay_enable_gfx_off);
@@ -2523,8 +2596,33 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (adev->rio_mem == NULL)
DRM_INFO("PCI I/O BAR is not found.\n");
+ /* enable PCIE atomic ops */
+ r = pci_enable_atomic_ops_to_root(adev->pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ if (r) {
+ adev->have_atomics_support = false;
+ DRM_INFO("PCIE atomic ops is not supported\n");
+ } else {
+ adev->have_atomics_support = true;
+ }
+
amdgpu_device_get_pcie_info(adev);
+ if (amdgpu_mcbp)
+ DRM_INFO("MCBP is enabled\n");
+
+ if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
+ adev->enable_mes = true;
+
+ if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
+ r = amdgpu_discovery_init(adev);
+ if (r) {
+ dev_err(adev->dev, "amdgpu_discovery_init failed\n");
+ return r;
+ }
+ }
+
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
@@ -2552,19 +2650,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto fence_driver_init;
}
- /* Read BIOS */
- if (!amdgpu_get_bios(adev)) {
- r = -EINVAL;
- goto failed;
- }
-
- r = amdgpu_atombios_init(adev);
- if (r) {
- dev_err(adev->dev, "amdgpu_atombios_init failed\n");
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
- goto failed;
- }
-
/* detect if we are with an SRIOV vbios */
amdgpu_device_detect_sriov_bios(adev);
@@ -2662,10 +2747,17 @@ fence_driver_init:
amdgpu_fbdev_init(adev);
+ if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
+ amdgpu_pm_virt_sysfs_init(adev);
+
r = amdgpu_pm_sysfs_init(adev);
if (r)
DRM_ERROR("registering pm debugfs failed (%d).\n", r);
+ r = amdgpu_ucode_sysfs_init(adev);
+ if (r)
+ DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+
r = amdgpu_debugfs_gem_init(adev);
if (r)
DRM_ERROR("registering gem debugfs failed (%d).\n", r);
@@ -2706,7 +2798,21 @@ fence_driver_init:
}
/* must succeed. */
- amdgpu_ras_post_init(adev);
+ amdgpu_ras_resume(adev);
+
+ queue_delayed_work(system_wq, &adev->delayed_init_work,
+ msecs_to_jiffies(AMDGPU_RESUME_MS));
+
+ r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
+ if (r) {
+ dev_err(adev->dev, "Could not create pcie_replay_count");
+ return r;
+ }
+
+ if (IS_ENABLED(CONFIG_PERF_EVENTS))
+ r = amdgpu_pmu_init(adev);
+ if (r)
+ dev_err(adev->dev, "amdgpu_pmu_init failed\n");
return 0;
@@ -2749,7 +2855,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->firmware.gpu_info_fw = NULL;
}
adev->accel_working = false;
- cancel_delayed_work_sync(&adev->late_init_work);
+ cancel_delayed_work_sync(&adev->delayed_init_work);
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
@@ -2770,7 +2876,17 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
iounmap(adev->rmmio);
adev->rmmio = NULL;
amdgpu_device_doorbell_fini(adev);
+ if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
+ amdgpu_pm_virt_sysfs_fini(adev);
+
amdgpu_debugfs_regs_cleanup(adev);
+ device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
+ amdgpu_ucode_sysfs_fini(adev);
+ if (IS_ENABLED(CONFIG_PERF_EVENTS))
+ amdgpu_pmu_fini(adev);
+ amdgpu_debugfs_preempt_cleanup(adev);
+ if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
+ amdgpu_discovery_fini(adev);
}
@@ -2810,7 +2926,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
if (fbcon)
amdgpu_fbdev_set_suspend(adev, 1);
- cancel_delayed_work_sync(&adev->late_init_work);
+ cancel_delayed_work_sync(&adev->delayed_init_work);
if (!amdgpu_device_has_dc_support(adev)) {
/* turn off display hw */
@@ -2851,6 +2967,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
amdgpu_amdkfd_suspend(adev);
+ amdgpu_ras_suspend(adev);
+
r = amdgpu_device_ip_suspend_phase1(adev);
/* evict vram memory */
@@ -2928,6 +3046,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
if (r)
return r;
+ queue_delayed_work(system_wq, &adev->delayed_init_work,
+ msecs_to_jiffies(AMDGPU_RESUME_MS));
+
if (!amdgpu_device_has_dc_support(adev)) {
/* pin cursors */
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -2951,7 +3072,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
return r;
/* Make sure IB tests flushed */
- flush_delayed_work(&adev->late_init_work);
+ flush_delayed_work(&adev->delayed_init_work);
/* blat the mode back in */
if (fbcon) {
@@ -2971,6 +3092,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
drm_kms_helper_poll_enable(dev);
+ amdgpu_ras_resume(adev);
+
/*
* Most of the connector probing functions try to acquire runtime pm
* refs to ensure that the GPU is powered on when connector polling is
@@ -3335,8 +3458,6 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
if (!ring || !ring->sched.thread)
continue;
- drm_sched_stop(&ring->sched);
-
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(ring);
}
@@ -3344,8 +3465,7 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
if(job)
drm_sched_increase_karma(&job->base);
-
-
+ /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
if (!amdgpu_sriov_vf(adev)) {
if (!need_full_reset)
@@ -3452,6 +3572,19 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
+ /*
+ * Add this ASIC as tracked as reset was already
+ * complete successfully.
+ */
+ amdgpu_register_gpu_instance(tmp_adev);
+
+ r = amdgpu_device_ip_late_init(tmp_adev);
+ if (r)
+ goto out;
+
+ /* must succeed. */
+ amdgpu_ras_resume(tmp_adev);
+
/* Update PSP FW topology after reset */
if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
r = amdgpu_xgmi_update_topology(hive, tmp_adev);
@@ -3483,38 +3616,21 @@ end:
return r;
}
-static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
- struct amdgpu_job *job)
+static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
{
- int i;
+ if (trylock) {
+ if (!mutex_trylock(&adev->lock_reset))
+ return false;
+ } else
+ mutex_lock(&adev->lock_reset);
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
-
- if (!ring || !ring->sched.thread)
- continue;
-
- if (!adev->asic_reset_res)
- drm_sched_resubmit_jobs(&ring->sched);
-
- drm_sched_start(&ring->sched, !adev->asic_reset_res);
- }
-
- if (!amdgpu_device_has_dc_support(adev)) {
- drm_helper_resume_force_mode(adev->ddev);
- }
-
- adev->asic_reset_res = 0;
-}
-
-static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
-{
- mutex_lock(&adev->lock_reset);
atomic_inc(&adev->gpu_reset_counter);
adev->in_gpu_reset = 1;
/* Block kfd: SRIOV would do it separately */
if (!amdgpu_sriov_vf(adev))
amdgpu_amdkfd_pre_reset(adev);
+
+ return true;
}
static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
@@ -3542,40 +3658,44 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
- int r;
+ struct list_head device_list, *device_list_handle = NULL;
+ bool need_full_reset, job_signaled;
struct amdgpu_hive_info *hive = NULL;
- bool need_full_reset = false;
struct amdgpu_device *tmp_adev = NULL;
- struct list_head device_list, *device_list_handle = NULL;
+ int i, r = 0;
+ need_full_reset = job_signaled = false;
INIT_LIST_HEAD(&device_list);
dev_info(adev->dev, "GPU reset begin!\n");
+ cancel_delayed_work_sync(&adev->delayed_init_work);
+
+ hive = amdgpu_get_xgmi_hive(adev, false);
+
/*
- * In case of XGMI hive disallow concurrent resets to be triggered
- * by different nodes. No point also since the one node already executing
- * reset will also reset all the other nodes in the hive.
+ * Here we trylock to avoid chain of resets executing from
+ * either trigger by jobs on different adevs in XGMI hive or jobs on
+ * different schedulers for same device while this TO handler is running.
+ * We always reset all schedulers for device and all devices for XGMI
+ * hive so that should take care of them too.
*/
- hive = amdgpu_get_xgmi_hive(adev, 0);
- if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
- !mutex_trylock(&hive->reset_lock))
+
+ if (hive && !mutex_trylock(&hive->reset_lock)) {
+ DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
+ job->base.id, hive->hive_id);
return 0;
+ }
/* Start with adev pre asic reset first for soft reset check.*/
- amdgpu_device_lock_adev(adev);
- r = amdgpu_device_pre_asic_reset(adev,
- job,
- &need_full_reset);
- if (r) {
- /*TODO Should we stop ?*/
- DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
- r, adev->ddev->unique);
- adev->asic_reset_res = r;
+ if (!amdgpu_device_lock_adev(adev, !hive)) {
+ DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
+ job->base.id);
+ return 0;
}
/* Build list of devices to reset */
- if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
if (!hive) {
amdgpu_device_unlock_adev(adev);
return -ENODEV;
@@ -3592,13 +3712,67 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
device_list_handle = &device_list;
}
+ /*
+ * Mark these ASICs to be reseted as untracked first
+ * And add them back after reset completed
+ */
+ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
+ amdgpu_unregister_gpu_instance(tmp_adev);
+
+ /* block all schedulers and reset given job's ring */
+ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+ /* disable ras on ALL IPs */
+ if (amdgpu_device_ip_need_full_reset(tmp_adev))
+ amdgpu_ras_suspend(tmp_adev);
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+
+ drm_sched_stop(&ring->sched, &job->base);
+ }
+ }
+
+
+ /*
+ * Must check guilty signal here since after this point all old
+ * HW fences are force signaled.
+ *
+ * job->base holds a reference to parent fence
+ */
+ if (job && job->base.s_fence->parent &&
+ dma_fence_is_signaled(job->base.s_fence->parent))
+ job_signaled = true;
+
+ if (!amdgpu_device_ip_need_full_reset(adev))
+ device_list_handle = &device_list;
+
+ if (job_signaled) {
+ dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
+ goto skip_hw_reset;
+ }
+
+
+ /* Guilty job will be freed after this*/
+ r = amdgpu_device_pre_asic_reset(adev,
+ job,
+ &need_full_reset);
+ if (r) {
+ /*TODO Should we stop ?*/
+ DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
+ r, adev->ddev->unique);
+ adev->asic_reset_res = r;
+ }
+
retry: /* Rest of adevs pre asic reset from XGMI hive. */
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
if (tmp_adev == adev)
continue;
- amdgpu_device_lock_adev(tmp_adev);
+ amdgpu_device_lock_adev(tmp_adev, false);
r = amdgpu_device_pre_asic_reset(tmp_adev,
NULL,
&need_full_reset);
@@ -3622,9 +3796,28 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
goto retry;
}
+skip_hw_reset:
+
/* Post ASIC reset for all devs .*/
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
- amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+ if (!ring || !ring->sched.thread)
+ continue;
+
+ /* No point to resubmit jobs if we didn't HW reset*/
+ if (!tmp_adev->asic_reset_res && !job_signaled)
+ drm_sched_resubmit_jobs(&ring->sched);
+
+ drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
+ }
+
+ if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
+ drm_helper_resume_force_mode(tmp_adev->ddev);
+ }
+
+ tmp_adev->asic_reset_res = 0;
if (r) {
/* bad news, how to tell it to userspace ? */
@@ -3637,7 +3830,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
amdgpu_device_unlock_adev(tmp_adev);
}
- if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
+ if (hive)
mutex_unlock(&hive->reset_lock);
if (r)
@@ -3645,43 +3838,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
return r;
}
-static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
- enum pci_bus_speed *speed,
- enum pcie_link_width *width)
-{
- struct pci_dev *pdev = adev->pdev;
- enum pci_bus_speed cur_speed;
- enum pcie_link_width cur_width;
- u32 ret = 1;
-
- *speed = PCI_SPEED_UNKNOWN;
- *width = PCIE_LNK_WIDTH_UNKNOWN;
-
- while (pdev) {
- cur_speed = pcie_get_speed_cap(pdev);
- cur_width = pcie_get_width_cap(pdev);
- ret = pcie_bandwidth_available(adev->pdev, NULL,
- NULL, &cur_width);
- if (!ret)
- cur_width = PCIE_LNK_WIDTH_RESRV;
-
- if (cur_speed != PCI_SPEED_UNKNOWN) {
- if (*speed == PCI_SPEED_UNKNOWN)
- *speed = cur_speed;
- else if (cur_speed < *speed)
- *speed = cur_speed;
- }
-
- if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
- if (*width == PCIE_LNK_WIDTH_UNKNOWN)
- *width = cur_width;
- else if (cur_width < *width)
- *width = cur_width;
- }
- pdev = pci_upstream_bridge(pdev);
- }
-}
-
/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
@@ -3715,8 +3871,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
return;
- amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap,
- &platform_link_width);
+ pcie_bandwidth_available(adev->pdev, NULL,
+ &platform_speed_cap, &platform_link_width);
if (adev->pm.pcie_gen_mask == 0) {
/* asic caps */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
new file mode 100644
index 000000000000..e049ae6a76fb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_discovery.h"
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "nbio/nbio_2_3_offset.h"
+#include "discovery.h"
+
+#define mmRCC_CONFIG_MEMSIZE 0xde3
+#define mmMM_INDEX 0x0
+#define mmMM_INDEX_HI 0x6
+#define mmMM_DATA 0x1
+#define HW_ID_MAX 300
+
+const char *hw_id_names[HW_ID_MAX] = {
+ [MP1_HWID] = "MP1",
+ [MP2_HWID] = "MP2",
+ [THM_HWID] = "THM",
+ [SMUIO_HWID] = "SMUIO",
+ [FUSE_HWID] = "FUSE",
+ [CLKA_HWID] = "CLKA",
+ [PWR_HWID] = "PWR",
+ [GC_HWID] = "GC",
+ [UVD_HWID] = "UVD",
+ [AUDIO_AZ_HWID] = "AUDIO_AZ",
+ [ACP_HWID] = "ACP",
+ [DCI_HWID] = "DCI",
+ [DMU_HWID] = "DMU",
+ [DCO_HWID] = "DCO",
+ [DIO_HWID] = "DIO",
+ [XDMA_HWID] = "XDMA",
+ [DCEAZ_HWID] = "DCEAZ",
+ [DAZ_HWID] = "DAZ",
+ [SDPMUX_HWID] = "SDPMUX",
+ [NTB_HWID] = "NTB",
+ [IOHC_HWID] = "IOHC",
+ [L2IMU_HWID] = "L2IMU",
+ [VCE_HWID] = "VCE",
+ [MMHUB_HWID] = "MMHUB",
+ [ATHUB_HWID] = "ATHUB",
+ [DBGU_NBIO_HWID] = "DBGU_NBIO",
+ [DFX_HWID] = "DFX",
+ [DBGU0_HWID] = "DBGU0",
+ [DBGU1_HWID] = "DBGU1",
+ [OSSSYS_HWID] = "OSSSYS",
+ [HDP_HWID] = "HDP",
+ [SDMA0_HWID] = "SDMA0",
+ [SDMA1_HWID] = "SDMA1",
+ [ISP_HWID] = "ISP",
+ [DBGU_IO_HWID] = "DBGU_IO",
+ [DF_HWID] = "DF",
+ [CLKB_HWID] = "CLKB",
+ [FCH_HWID] = "FCH",
+ [DFX_DAP_HWID] = "DFX_DAP",
+ [L1IMU_PCIE_HWID] = "L1IMU_PCIE",
+ [L1IMU_NBIF_HWID] = "L1IMU_NBIF",
+ [L1IMU_IOAGR_HWID] = "L1IMU_IOAGR",
+ [L1IMU3_HWID] = "L1IMU3",
+ [L1IMU4_HWID] = "L1IMU4",
+ [L1IMU5_HWID] = "L1IMU5",
+ [L1IMU6_HWID] = "L1IMU6",
+ [L1IMU7_HWID] = "L1IMU7",
+ [L1IMU8_HWID] = "L1IMU8",
+ [L1IMU9_HWID] = "L1IMU9",
+ [L1IMU10_HWID] = "L1IMU10",
+ [L1IMU11_HWID] = "L1IMU11",
+ [L1IMU12_HWID] = "L1IMU12",
+ [L1IMU13_HWID] = "L1IMU13",
+ [L1IMU14_HWID] = "L1IMU14",
+ [L1IMU15_HWID] = "L1IMU15",
+ [WAFLC_HWID] = "WAFLC",
+ [FCH_USB_PD_HWID] = "FCH_USB_PD",
+ [PCIE_HWID] = "PCIE",
+ [PCS_HWID] = "PCS",
+ [DDCL_HWID] = "DDCL",
+ [SST_HWID] = "SST",
+ [IOAGR_HWID] = "IOAGR",
+ [NBIF_HWID] = "NBIF",
+ [IOAPIC_HWID] = "IOAPIC",
+ [SYSTEMHUB_HWID] = "SYSTEMHUB",
+ [NTBCCP_HWID] = "NTBCCP",
+ [UMC_HWID] = "UMC",
+ [SATA_HWID] = "SATA",
+ [USB_HWID] = "USB",
+ [CCXSEC_HWID] = "CCXSEC",
+ [XGMI_HWID] = "XGMI",
+ [XGBE_HWID] = "XGBE",
+ [MP0_HWID] = "MP0",
+};
+
+static int hw_id_map[MAX_HWIP] = {
+ [GC_HWIP] = GC_HWID,
+ [HDP_HWIP] = HDP_HWID,
+ [SDMA0_HWIP] = SDMA0_HWID,
+ [SDMA1_HWIP] = SDMA1_HWID,
+ [MMHUB_HWIP] = MMHUB_HWID,
+ [ATHUB_HWIP] = ATHUB_HWID,
+ [NBIO_HWIP] = NBIF_HWID,
+ [MP0_HWIP] = MP0_HWID,
+ [MP1_HWIP] = MP1_HWID,
+ [UVD_HWIP] = UVD_HWID,
+ [VCE_HWIP] = VCE_HWID,
+ [DF_HWIP] = DF_HWID,
+ [DCE_HWIP] = DCEAZ_HWID,
+ [OSSSYS_HWIP] = OSSSYS_HWID,
+ [SMUIO_HWIP] = SMUIO_HWID,
+ [PWR_HWIP] = PWR_HWID,
+ [NBIF_HWIP] = NBIF_HWID,
+ [THM_HWIP] = THM_HWID,
+ [CLK_HWIP] = CLKA_HWID,
+};
+
+static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
+{
+ uint32_t *p = (uint32_t *)binary;
+ uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
+ uint64_t pos = vram_size - BINARY_MAX_SIZE;
+ unsigned long flags;
+
+ while (pos < vram_size) {
+ spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+ WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+ WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
+ *p++ = RREG32_NO_KIQ(mmMM_DATA);
+ spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+ pos += 4;
+ }
+
+ return 0;
+}
+
+static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size)
+{
+ uint16_t checksum = 0;
+ int i;
+
+ for (i = 0; i < size; i++)
+ checksum += data[i];
+
+ return checksum;
+}
+
+static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size,
+ uint16_t expected)
+{
+ return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
+}
+
+int amdgpu_discovery_init(struct amdgpu_device *adev)
+{
+ struct table_info *info;
+ struct binary_header *bhdr;
+ struct ip_discovery_header *ihdr;
+ struct gpu_info_header *ghdr;
+ uint16_t offset;
+ uint16_t size;
+ uint16_t checksum;
+ int r;
+
+ adev->discovery = kzalloc(BINARY_MAX_SIZE, GFP_KERNEL);
+ if (!adev->discovery)
+ return -ENOMEM;
+
+ r = amdgpu_discovery_read_binary(adev, adev->discovery);
+ if (r) {
+ DRM_ERROR("failed to read ip discovery binary\n");
+ goto out;
+ }
+
+ bhdr = (struct binary_header *)adev->discovery;
+
+ if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) {
+ DRM_ERROR("invalid ip discovery binary signature\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ offset = offsetof(struct binary_header, binary_checksum) +
+ sizeof(bhdr->binary_checksum);
+ size = bhdr->binary_size - offset;
+ checksum = bhdr->binary_checksum;
+
+ if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
+ size, checksum)) {
+ DRM_ERROR("invalid ip discovery binary checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ info = &bhdr->table_list[IP_DISCOVERY];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+ ihdr = (struct ip_discovery_header *)(adev->discovery + offset);
+
+ if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
+ DRM_ERROR("invalid ip discovery data table signature\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
+ ihdr->size, checksum)) {
+ DRM_ERROR("invalid ip discovery data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ info = &bhdr->table_list[GC];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+ ghdr = (struct gpu_info_header *)(adev->discovery + offset);
+
+ if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
+ ghdr->size, checksum)) {
+ DRM_ERROR("invalid gc data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ return 0;
+
+out:
+ kfree(adev->discovery);
+ adev->discovery = NULL;
+
+ return r;
+}
+
+void amdgpu_discovery_fini(struct amdgpu_device *adev)
+{
+ kfree(adev->discovery);
+ adev->discovery = NULL;
+}
+
+int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
+{
+ struct binary_header *bhdr;
+ struct ip_discovery_header *ihdr;
+ struct die_header *dhdr;
+ struct ip *ip;
+ uint16_t die_offset;
+ uint16_t ip_offset;
+ uint16_t num_dies;
+ uint16_t num_ips;
+ uint8_t num_base_address;
+ int hw_ip;
+ int i, j, k;
+
+ if (!adev->discovery) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)adev->discovery;
+ ihdr = (struct ip_discovery_header *)(adev->discovery +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ num_dies = le16_to_cpu(ihdr->num_dies);
+
+ DRM_DEBUG("number of dies: %d\n", num_dies);
+
+ for (i = 0; i < num_dies; i++) {
+ die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
+ dhdr = (struct die_header *)(adev->discovery + die_offset);
+ num_ips = le16_to_cpu(dhdr->num_ips);
+ ip_offset = die_offset + sizeof(*dhdr);
+
+ if (le16_to_cpu(dhdr->die_id) != i) {
+ DRM_ERROR("invalid die id %d, expected %d\n",
+ le16_to_cpu(dhdr->die_id), i);
+ return -EINVAL;
+ }
+
+ DRM_DEBUG("number of hardware IPs on die%d: %d\n",
+ le16_to_cpu(dhdr->die_id), num_ips);
+
+ for (j = 0; j < num_ips; j++) {
+ ip = (struct ip *)(adev->discovery + ip_offset);
+ num_base_address = ip->num_base_address;
+
+ DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
+ hw_id_names[le16_to_cpu(ip->hw_id)],
+ le16_to_cpu(ip->hw_id),
+ ip->number_instance,
+ ip->major, ip->minor,
+ ip->revision);
+
+ for (k = 0; k < num_base_address; k++) {
+ /*
+ * convert the endianness of base addresses in place,
+ * so that we don't need to convert them when accessing adev->reg_offset.
+ */
+ ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
+ DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
+ }
+
+ for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
+ if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) {
+ DRM_INFO("set register base offset for %s\n",
+ hw_id_names[le16_to_cpu(ip->hw_id)]);
+ adev->reg_offset[hw_ip][ip->number_instance] =
+ ip->base_address;
+ }
+
+ }
+
+ ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
+ }
+ }
+
+ return 0;
+}
+
+int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
+ int *major, int *minor)
+{
+ struct binary_header *bhdr;
+ struct ip_discovery_header *ihdr;
+ struct die_header *dhdr;
+ struct ip *ip;
+ uint16_t die_offset;
+ uint16_t ip_offset;
+ uint16_t num_dies;
+ uint16_t num_ips;
+ int i, j;
+
+ if (!adev->discovery) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)adev->discovery;
+ ihdr = (struct ip_discovery_header *)(adev->discovery +
+ le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+ num_dies = le16_to_cpu(ihdr->num_dies);
+
+ for (i = 0; i < num_dies; i++) {
+ die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
+ dhdr = (struct die_header *)(adev->discovery + die_offset);
+ num_ips = le16_to_cpu(dhdr->num_ips);
+ ip_offset = die_offset + sizeof(*dhdr);
+
+ for (j = 0; j < num_ips; j++) {
+ ip = (struct ip *)(adev->discovery + ip_offset);
+
+ if (le16_to_cpu(ip->hw_id) == hw_id) {
+ if (major)
+ *major = ip->major;
+ if (minor)
+ *minor = ip->minor;
+ return 0;
+ }
+ ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
+ }
+ }
+
+ return -EINVAL;
+}
+
+int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
+{
+ struct binary_header *bhdr;
+ struct gc_info_v1_0 *gc_info;
+
+ if (!adev->discovery) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)adev->discovery;
+ gc_info = (struct gc_info_v1_0 *)(adev->discovery +
+ le16_to_cpu(bhdr->table_list[GC].offset));
+
+ adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
+ adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
+ le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
+ adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
+ adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
+ adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
+ adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
+ adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
+ adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
+ adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
+ adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
+ adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
+ adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
+ le32_to_cpu(gc_info->gc_num_sa_per_se);
+ adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
new file mode 100644
index 000000000000..85b8c4d4d576
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DISCOVERY__
+#define __AMDGPU_DISCOVERY__
+
+int amdgpu_discovery_init(struct amdgpu_device *adev);
+void amdgpu_discovery_fini(struct amdgpu_device *adev);
+int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
+int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
+ int *major, int *minor);
+int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
+
+#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index b083b219b1a9..535650967b1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -23,7 +23,7 @@
* Authors: Dave Airlie
* Alex Deucher
*/
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_i2c.h"
@@ -32,11 +32,13 @@
#include "amdgpu_display.h"
#include <asm/div64.h>
+#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_edid.h>
#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_fb_helper.h>
+#include <drm/drm_vblank.h>
static void amdgpu_display_flip_callback(struct dma_fence *f,
struct dma_fence_cb *cb)
@@ -631,10 +633,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
amdgpu_dither_enum_list, sz);
if (amdgpu_device_has_dc_support(adev)) {
- adev->mode_info.max_bpc_property =
- drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
- if (!adev->mode_info.max_bpc_property)
- return -ENOMEM;
adev->mode_info.abm_level_property =
drm_property_create_range(adev->ddev, 0,
"abm level", 0, 4);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index a38e0fb4a6fe..489041df1f45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2012 Advanced Micro Devices, Inc.
+ * Copyright 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -31,8 +31,6 @@
* objects between different devices via PRIME <prime_buffer_sharing>`.
*/
-#include <drm/drmP.h>
-
#include "amdgpu.h"
#include "amdgpu_display.h"
#include "amdgpu_gem.h"
@@ -103,7 +101,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
* Returns:
* 0 on success or a negative error code on failure.
*/
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
+ struct vm_area_struct *vma)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -137,57 +136,6 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
return ret;
}
-/**
- * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
- * implementation
- * @dev: DRM device
- * @attach: DMA-buf attachment
- * @sg: Scatter/gather table
- *
- * Imports shared DMA buffer memory exported by another device.
- *
- * Returns:
- * A new GEM BO of the given DRM device, representing the memory
- * described by the given DMA-buf attachment and scatter/gather table.
- */
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
- struct dma_buf_attachment *attach,
- struct sg_table *sg)
-{
- struct reservation_object *resv = attach->dmabuf->resv;
- struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_bo *bo;
- struct amdgpu_bo_param bp;
- int ret;
-
- memset(&bp, 0, sizeof(bp));
- bp.size = attach->dmabuf->size;
- bp.byte_align = PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_CPU;
- bp.flags = 0;
- bp.type = ttm_bo_type_sg;
- bp.resv = resv;
- ww_mutex_lock(&resv->lock, NULL);
- ret = amdgpu_bo_create(adev, &bp, &bo);
- if (ret)
- goto error;
-
- bo->tbo.sg = sg;
- bo->tbo.ttm->sg = sg;
- bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
- bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
- if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
- bo->prime_shared_count = 1;
-
- ww_mutex_unlock(&resv->lock);
- return &bo->gem_base;
-
-error:
- ww_mutex_unlock(&resv->lock);
- return ERR_PTR(ret);
-}
-
static int
__reservation_object_make_exclusive(struct reservation_object *obj)
{
@@ -231,7 +179,7 @@ err_fences_put:
}
/**
- * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
+ * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
* @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment
*
@@ -242,8 +190,8 @@ err_fences_put:
* Returns:
* 0 on success or a negative error code on failure.
*/
-static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
- struct dma_buf_attachment *attach)
+static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
+ struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -291,15 +239,15 @@ error_detach:
}
/**
- * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
+ * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
* @dma_buf: Shared DMA buffer
* @attach: DMA-buf attachment
*
* This is called when a shared DMA buffer no longer needs to be accessible by
* another device. For now, simply unpins the buffer from GTT.
*/
-static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
- struct dma_buf_attachment *attach)
+static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
+ struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -334,7 +282,7 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
}
/**
- * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
+ * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
* @dma_buf: Shared DMA buffer
* @direction: Direction of DMA transfer
*
@@ -345,8 +293,8 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
* Returns:
* 0 on success or a negative error code on failure.
*/
-static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
- enum dma_data_direction direction)
+static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
+ enum dma_data_direction direction)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -374,12 +322,12 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
}
const struct dma_buf_ops amdgpu_dmabuf_ops = {
- .attach = amdgpu_gem_map_attach,
- .detach = amdgpu_gem_map_detach,
+ .attach = amdgpu_dma_buf_map_attach,
+ .detach = amdgpu_dma_buf_map_detach,
.map_dma_buf = drm_gem_map_dma_buf,
.unmap_dma_buf = drm_gem_unmap_dma_buf,
.release = drm_gem_dmabuf_release,
- .begin_cpu_access = amdgpu_gem_begin_cpu_access,
+ .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
.vunmap = drm_gem_dmabuf_vunmap,
@@ -418,6 +366,57 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
}
/**
+ * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
+ * implementation
+ * @dev: DRM device
+ * @attach: DMA-buf attachment
+ * @sg: Scatter/gather table
+ *
+ * Imports shared DMA buffer memory exported by another device.
+ *
+ * Returns:
+ * A new GEM BO of the given DRM device, representing the memory
+ * described by the given DMA-buf attachment and scatter/gather table.
+ */
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sg)
+{
+ struct reservation_object *resv = attach->dmabuf->resv;
+ struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_bo *bo;
+ struct amdgpu_bo_param bp;
+ int ret;
+
+ memset(&bp, 0, sizeof(bp));
+ bp.size = attach->dmabuf->size;
+ bp.byte_align = PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+ bp.flags = 0;
+ bp.type = ttm_bo_type_sg;
+ bp.resv = resv;
+ ww_mutex_lock(&resv->lock, NULL);
+ ret = amdgpu_bo_create(adev, &bp, &bo);
+ if (ret)
+ goto error;
+
+ bo->tbo.sg = sg;
+ bo->tbo.ttm->sg = sg;
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+ if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+ bo->prime_shared_count = 1;
+
+ ww_mutex_unlock(&resv->lock);
+ return &bo->gem_base;
+
+error:
+ ww_mutex_unlock(&resv->lock);
+ return ERR_PTR(ret);
+}
+
+/**
* amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
* @dev: DRM device
* @dma_buf: Shared DMA buffer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
new file mode 100644
index 000000000000..c7056cbe8685
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_DMA_BUF_H__
+#define __AMDGPU_DMA_BUF_H__
+
+#include <drm/drm_gem.h>
+
+struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sg);
+struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
+ struct drm_gem_object *gobj,
+ int flags);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf);
+struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
+void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
+void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
+ struct vm_area_struct *vma);
+
+extern const struct dma_buf_ops amdgpu_dmabuf_ops;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 68959b923f89..790263dcc064 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -51,6 +51,7 @@ struct amdgpu_doorbell_index {
uint32_t userqueue_start;
uint32_t userqueue_end;
uint32_t gfx_ring0;
+ uint32_t gfx_ring1;
uint32_t sdma_engine[8];
uint32_t ih;
union {
@@ -153,6 +154,45 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF
} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
+typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
+{
+ /* Compute + GFX: 0~255 */
+ AMDGPU_NAVI10_DOORBELL_KIQ = 0x000,
+ AMDGPU_NAVI10_DOORBELL_HIQ = 0x001,
+ AMDGPU_NAVI10_DOORBELL_DIQ = 0x002,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING0 = 0x003,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING1 = 0x004,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING2 = 0x005,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING3 = 0x006,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING4 = 0x007,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING5 = 0x008,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING6 = 0x009,
+ AMDGPU_NAVI10_DOORBELL_MEC_RING7 = 0x00A,
+ AMDGPU_NAVI10_DOORBELL_USERQUEUE_START = 0x00B,
+ AMDGPU_NAVI10_DOORBELL_USERQUEUE_END = 0x08A,
+ AMDGPU_NAVI10_DOORBELL_GFX_RING0 = 0x08B,
+ AMDGPU_NAVI10_DOORBELL_GFX_RING1 = 0x08C,
+ /* SDMA:256~335*/
+ AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 = 0x100,
+ AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1 = 0x10A,
+ /* IH: 376~391 */
+ AMDGPU_NAVI10_DOORBELL_IH = 0x178,
+ /* MMSCH: 392~407
+ * overlap the doorbell assignment with VCN as they are mutually exclusive
+ * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
+ */
+ AMDGPU_NAVI10_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
+ AMDGPU_NAVI10_DOORBELL64_VCN2_3 = 0x189,
+ AMDGPU_NAVI10_DOORBELL64_VCN4_5 = 0x18A,
+ AMDGPU_NAVI10_DOORBELL64_VCN6_7 = 0x18B,
+
+ AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0,
+ AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCN6_7,
+
+ AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = 0x18F,
+ AMDGPU_NAVI10_DOORBELL_INVALID = 0xFFFF
+} AMDGPU_NAVI10_DOORBELL_ASSIGNMENT;
+
/*
* 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index 523b8ab6b04e..61bd10310604 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -22,7 +22,6 @@
* Authors: Alex Deucher
*/
-#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "amdgpu_i2c.h"
@@ -907,16 +906,63 @@ amdgpu_get_vce_clock_state(void *handle, u32 idx)
int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low)
{
- if (is_support_sw_smu(adev))
- return smu_get_sclk(&adev->smu, low);
- else
+ uint32_t clk_freq;
+ int ret = 0;
+ if (is_support_sw_smu(adev)) {
+ ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK,
+ low ? &clk_freq : NULL,
+ !low ? &clk_freq : NULL);
+ if (ret)
+ return 0;
+ return clk_freq * 100;
+
+ } else {
return (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (low));
+ }
}
int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low)
{
- if (is_support_sw_smu(adev))
- return smu_get_mclk(&adev->smu, low);
- else
+ uint32_t clk_freq;
+ int ret = 0;
+ if (is_support_sw_smu(adev)) {
+ ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK,
+ low ? &clk_freq : NULL,
+ !low ? &clk_freq : NULL);
+ if (ret)
+ return 0;
+ return clk_freq * 100;
+
+ } else {
return (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (low));
+ }
+}
+
+int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block_type, bool gate)
+{
+ int ret = 0;
+ bool swsmu = is_support_sw_smu(adev);
+
+ switch (block_type) {
+ case AMD_IP_BLOCK_TYPE_GFX:
+ case AMD_IP_BLOCK_TYPE_UVD:
+ case AMD_IP_BLOCK_TYPE_VCN:
+ case AMD_IP_BLOCK_TYPE_VCE:
+ if (swsmu)
+ ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
+ else
+ ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+ (adev)->powerplay.pp_handle, block_type, gate));
+ break;
+ case AMD_IP_BLOCK_TYPE_GMC:
+ case AMD_IP_BLOCK_TYPE_ACP:
+ case AMD_IP_BLOCK_TYPE_SDMA:
+ ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+ (adev)->powerplay.pp_handle, block_type, gate));
+ break;
+ default:
+ break;
+ }
+
+ return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index dca35407879d..1c5c0fd76dbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal {
int min_temp;
/* high temperature threshold */
int max_temp;
+ /* edge max emergency(shutdown) temp */
+ int max_edge_emergency_temp;
+ /* hotspot low temperature threshold */
+ int min_hotspot_temp;
+ /* hotspot high temperature critical threshold */
+ int max_hotspot_crit_temp;
+ /* hotspot max emergency(shutdown) temp */
+ int max_hotspot_emergency_temp;
+ /* memory low temperature threshold */
+ int min_mem_temp;
+ /* memory high temperature critical threshold */
+ int max_mem_crit_temp;
+ /* memory max emergency(shutdown) temp */
+ int max_mem_emergency_temp;
/* was last interrupt low to high or high to low */
bool high_to_low;
/* interrupt source */
@@ -341,10 +355,6 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
(adev)->powerplay.pp_handle, msg_id))
-#define amdgpu_dpm_set_powergating_by_smu(adev, block_type, gate) \
- ((adev)->powerplay.pp_funcs->set_powergating_by_smu(\
- (adev)->powerplay.pp_handle, block_type, gate))
-
#define amdgpu_dpm_get_power_profile_mode(adev, buf) \
((adev)->powerplay.pp_funcs->get_power_profile_mode(\
(adev)->powerplay.pp_handle, buf))
@@ -506,6 +516,9 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev,
struct amd_vce_state*
amdgpu_get_vce_clock_state(void *handle, u32 idx);
+int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev,
+ uint32_t block_type, bool gate);
+
extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low);
extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 1e2cc9d68a05..1b0613c7cf95 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -22,21 +22,23 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
#include <drm/drm_gem.h>
+#include <drm/drm_vblank.h>
#include "amdgpu_drv.h"
#include <drm/drm_pciids.h>
#include <linux/console.h>
#include <linux/module.h>
+#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <linux/vga_switcheroo.h>
#include <drm/drm_probe_helper.h>
#include "amdgpu.h"
#include "amdgpu_irq.h"
-#include "amdgpu_gem.h"
+#include "amdgpu_dma_buf.h"
#include "amdgpu_amdkfd.h"
@@ -76,11 +78,14 @@
* - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
* - 3.31.0 - Add support for per-flip tiling attribute changes with DC
* - 3.32.0 - Add syncobj timeline support to AMDGPU_CS.
+ * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS.
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 32
+#define KMS_DRIVER_MINOR 33
#define KMS_DRIVER_PATCHLEVEL 0
+#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256
+
int amdgpu_vram_limit = 0;
int amdgpu_vis_vram_limit = 0;
int amdgpu_gart_size = -1; /* auto */
@@ -93,7 +98,7 @@ int amdgpu_disp_priority = 0;
int amdgpu_hw_i2c = 0;
int amdgpu_pcie_gen2 = -1;
int amdgpu_msi = -1;
-int amdgpu_lockup_timeout = 10000;
+char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
int amdgpu_dpm = -1;
int amdgpu_fw_load_type = -1;
int amdgpu_aspm = -1;
@@ -106,7 +111,6 @@ int amdgpu_vm_fragment_size = -1;
int amdgpu_vm_block_size = -1;
int amdgpu_vm_fault_stop = 0;
int amdgpu_vm_debug = 0;
-int amdgpu_vram_page_split = 512;
int amdgpu_vm_update_mode = -1;
int amdgpu_exp_hw_support = 0;
int amdgpu_dc = -1;
@@ -134,6 +138,10 @@ int amdgpu_emu_mode = 0;
uint amdgpu_smu_memory_pool_size = 0;
/* FBC (bit 0) disabled by default*/
uint amdgpu_dc_feature_mask = 0;
+int amdgpu_async_gfx_ring = 1;
+int amdgpu_mcbp = 0;
+int amdgpu_discovery = 0;
+int amdgpu_mes = 0;
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -227,16 +235,28 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(msi, amdgpu_msi, int, 0444);
/**
- * DOC: lockup_timeout (int)
- * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000.
- * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000.
- */
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
-module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
+ * DOC: lockup_timeout (string)
+ * Set GPU scheduler timeout value in ms.
+ *
+ * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
+ * multiple values specified. 0 and negative values are invalidated. They will be adjusted
+ * to default timeout.
+ * - With one value specified, the setting will apply to all non-compute jobs.
+ * - With multiple values specified, the first one will be for GFX. The second one is for Compute.
+ * And the third and fourth ones are for SDMA and Video.
+ * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
+ * jobs is 10000. And there is no timeout enforced on compute jobs.
+ */
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and infinity timeout for compute jobs."
+ " 0: keep default value. negative: infinity timeout), "
+ "format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
+module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
/**
* DOC: dpm (int)
- * Override for dynamic power management setting (1 = enable, 0 = disable). The default is -1 (auto).
+ * Override for dynamic power management setting
+ * (0 = disable, 1 = enable, 2 = enable sw smu driver for vega20)
+ * The default is -1 (auto).
*/
MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(dpm, amdgpu_dpm, int, 0444);
@@ -332,13 +352,6 @@ MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except
module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
/**
- * DOC: vram_page_split (int)
- * Override the number of pages after we split VRAM allocations (default 512, -1 = disable). The default is 512.
- */
-MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)");
-module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
-
-/**
* DOC: exp_hw_support (int)
* Enable experimental hw support (1 = enable). The default is 0 (disabled).
*/
@@ -561,6 +574,39 @@ MODULE_PARM_DESC(smu_memory_pool_size,
"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
+/**
+ * DOC: async_gfx_ring (int)
+ * It is used to enable gfx rings that could be configured with different prioritites or equal priorities
+ */
+MODULE_PARM_DESC(async_gfx_ring,
+ "Asynchronous GFX rings that could be configured with either different priorities (HP3D ring and LP3D ring), or equal priorities (0 = disabled, 1 = enabled (default))");
+module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444);
+
+/**
+ * DOC: mcbp (int)
+ * It is used to enable mid command buffer preemption. (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mcbp,
+ "Enable Mid-command buffer preemption (0 = disabled (default), 1 = enabled)");
+module_param_named(mcbp, amdgpu_mcbp, int, 0444);
+
+/**
+ * DOC: discovery (int)
+ * Allow driver to discover hardware IP information from IP Discovery table at the top of VRAM.
+ */
+MODULE_PARM_DESC(discovery,
+ "Allow driver to discover hardware IPs from IP Discovery table at the top of VRAM");
+module_param_named(discovery, amdgpu_discovery, int, 0444);
+
+/**
+ * DOC: mes (int)
+ * Enable Micro Engine Scheduler. This is a new hw scheduling engine for gfx, sdma, and compute.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes,
+ "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
+module_param_named(mes, amdgpu_mes, int, 0444);
+
#ifdef CONFIG_HSA_AMD
/**
* DOC: sched_policy (int)
@@ -655,6 +701,24 @@ MODULE_PARM_DESC(noretry,
int halt_if_hws_hang;
module_param(halt_if_hws_hang, int, 0644);
MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
+
+/**
+ * DOC: hws_gws_support(bool)
+ * Whether HWS support gws barriers. Default value: false (not supported)
+ * This will be replaced with a MEC firmware version check once firmware
+ * is ready
+ */
+bool hws_gws_support;
+module_param(hws_gws_support, bool, 0444);
+MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
+
+/**
+ * DOC: queue_preemption_timeout_ms (int)
+ * queue preemption timeout in ms (1 = Minimum, 9000 = default)
+ */
+int queue_preemption_timeout_ms = 9000;
+module_param(queue_preemption_timeout_ms, int, 0644);
+MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
#endif
/**
@@ -665,6 +729,22 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau
MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))");
module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
+/**
+ * DOC: abmlevel (uint)
+ * Override the default ABM (Adaptive Backlight Management) level used for DC
+ * enabled hardware. Requires DMCU to be supported and loaded.
+ * Valid levels are 0-4. A value of 0 indicates that ABM should be disabled by
+ * default. Values 1-4 control the maximum allowable brightness reduction via
+ * the ABM algorithm, with 1 being the least reduction and 4 being the most
+ * reduction.
+ *
+ * Defaults to 0, or disabled. Userspace can still override this level later
+ * after boot.
+ */
+uint amdgpu_dm_abm_level = 0;
+MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) ");
+module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
+
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -921,6 +1001,14 @@ static const struct pci_device_id pciidlist[] = {
/* Raven */
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
{0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
+ /* Navi10 */
+ {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x7318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x7319, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+ {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
{0, 0, 0}
};
@@ -1216,6 +1304,66 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
return 0;
}
+int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
+{
+ char *input = amdgpu_lockup_timeout;
+ char *timeout_setting = NULL;
+ int index = 0;
+ long timeout;
+ int ret = 0;
+
+ /*
+ * By default timeout for non compute jobs is 10000.
+ * And there is no timeout enforced on compute jobs.
+ */
+ adev->gfx_timeout = msecs_to_jiffies(10000);
+ adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+ adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
+
+ if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
+ while ((timeout_setting = strsep(&input, ",")) &&
+ strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
+ ret = kstrtol(timeout_setting, 0, &timeout);
+ if (ret)
+ return ret;
+
+ if (timeout == 0) {
+ index++;
+ continue;
+ } else if (timeout < 0) {
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ } else {
+ timeout = msecs_to_jiffies(timeout);
+ }
+
+ switch (index++) {
+ case 0:
+ adev->gfx_timeout = timeout;
+ break;
+ case 1:
+ adev->compute_timeout = timeout;
+ break;
+ case 2:
+ adev->sdma_timeout = timeout;
+ break;
+ case 3:
+ adev->video_timeout = timeout;
+ break;
+ default:
+ break;
+ }
+ }
+ /*
+ * There is only one value specified and
+ * it should apply to all non-compute jobs.
+ */
+ if (index == 1)
+ adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+ }
+
+ return ret;
+}
+
static bool
amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
bool in_vblank_irq, int *vpos, int *hpos,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
index ec78e2b2015c..571a6dfb473e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -23,7 +23,7 @@
* Authors: Dave Airlie
* Alex Deucher
*/
-#include <drm/drmP.h>
+
#include <drm/drm_crtc_helper.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index e47609218839..eb3569b46c1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -23,22 +23,22 @@
* Authors:
* David Airlie
*/
+
#include <linux/module.h>
-#include <linux/slab.h>
#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/vga_switcheroo.h>
-#include <drm/drmP.h>
+#include <drm/amdgpu_drm.h>
#include <drm/drm_crtc.h>
#include <drm/drm_crtc_helper.h>
-#include <drm/amdgpu_drm.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
+
#include "amdgpu.h"
#include "cikd.h"
#include "amdgpu_gem.h"
-#include <drm/drm_fb_helper.h>
-
-#include <linux/vga_switcheroo.h>
-
#include "amdgpu_display.h"
/* object hierarchy -
@@ -121,6 +121,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
struct drm_mode_fb_cmd2 *mode_cmd,
struct drm_gem_object **gobj_p)
{
+ const struct drm_format_info *info;
struct amdgpu_device *adev = rfbdev->adev;
struct drm_gem_object *gobj = NULL;
struct amdgpu_bo *abo = NULL;
@@ -131,7 +132,8 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
int height = mode_cmd->height;
u32 cpp;
- cpp = drm_format_plane_cpp(mode_cmd->pixel_format, 0);
+ info = drm_get_format_info(adev->ddev, mode_cmd);
+ cpp = info->cpp[0];
/* need to align pitch with crtc limits */
mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 4dee2326b29c..23085b352cf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -34,7 +34,9 @@
#include <linux/kref.h>
#include <linux/slab.h>
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
+
#include "amdgpu.h"
#include "amdgpu_trace.h"
@@ -427,9 +429,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
unsigned num_hw_submission)
{
+ struct amdgpu_device *adev = ring->adev;
long timeout;
int r;
+ if (!adev)
+ return -EINVAL;
+
/* Check that num_hw_submission is a power of two */
if ((num_hw_submission & (num_hw_submission - 1)) != 0)
return -EINVAL;
@@ -451,12 +457,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
/* No need to setup the GPU scheduler for KIQ ring */
if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
- /* for non-sriov case, no timeout enforce on compute ring */
- if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
- && !amdgpu_sriov_vf(ring->adev))
- timeout = MAX_SCHEDULE_TIMEOUT;
- else
- timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ timeout = adev->gfx_timeout;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ /*
+ * For non-sriov case, no timeout enforce
+ * on compute ring by default. Unless user
+ * specifies a timeout for compute ring.
+ *
+ * For sriov case, always use the timeout
+ * as gfx ring
+ */
+ if (!amdgpu_sriov_vf(ring->adev))
+ timeout = adev->compute_timeout;
+ else
+ timeout = adev->gfx_timeout;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ timeout = adev->sdma_timeout;
+ break;
+ default:
+ timeout = adev->video_timeout;
+ break;
+ }
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
num_hw_submission, amdgpu_job_hang_limit,
@@ -684,22 +709,30 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
amdgpu_fence_process(ring);
seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
- seq_printf(m, "Last signaled fence 0x%08x\n",
+ seq_printf(m, "Last signaled fence 0x%08x\n",
atomic_read(&ring->fence_drv.last_seq));
- seq_printf(m, "Last emitted 0x%08x\n",
+ seq_printf(m, "Last emitted 0x%08x\n",
ring->fence_drv.sync_seq);
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX ||
+ ring->funcs->type == AMDGPU_RING_TYPE_SDMA) {
+ seq_printf(m, "Last signaled trailing fence 0x%08x\n",
+ le32_to_cpu(*ring->trail_fence_cpu_addr));
+ seq_printf(m, "Last emitted 0x%08x\n",
+ ring->trail_seq);
+ }
+
if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
continue;
/* set in CP_VMID_PREEMPT and preemption occurred */
- seq_printf(m, "Last preempted 0x%08x\n",
+ seq_printf(m, "Last preempted 0x%08x\n",
le32_to_cpu(*(ring->fence_drv.cpu_addr + 2)));
/* set in CP_VMID_RESET and reset occurred */
- seq_printf(m, "Last reset 0x%08x\n",
+ seq_printf(m, "Last reset 0x%08x\n",
le32_to_cpu(*(ring->fence_drv.cpu_addr + 4)));
/* Both preemption and reset occurred */
- seq_printf(m, "Last both 0x%08x\n",
+ seq_printf(m, "Last both 0x%08x\n",
le32_to_cpu(*(ring->fence_drv.cpu_addr + 6)));
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 6d11e1721147..d79ab1da9e07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -25,7 +25,10 @@
* Alex Deucher
* Jerome Glisse
*/
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
#include <drm/amdgpu_drm.h>
#ifdef CONFIG_X86
#include <asm/set_memory.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index f89f5734d985..df8a23554831 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -27,26 +27,12 @@
struct amdgpu_ring;
struct amdgpu_bo;
-struct amdgpu_gds_asic_info {
- uint32_t total_size;
- uint32_t gfx_partition_size;
- uint32_t cs_partition_size;
-};
-
struct amdgpu_gds {
- struct amdgpu_gds_asic_info mem;
- struct amdgpu_gds_asic_info gws;
- struct amdgpu_gds_asic_info oa;
- uint32_t gds_compute_max_wave_id;
-
- /* At present, GDS, GWS and OA resources for gfx (graphics)
- * is always pre-allocated and available for graphics operation.
- * Such resource is shared between all gfx clients.
- * TODO: move this operation to user space
- * */
- struct amdgpu_bo* gds_gfx_bo;
- struct amdgpu_bo* gws_gfx_bo;
- struct amdgpu_bo* oa_gfx_bo;
+ uint32_t gds_size;
+ uint32_t gws_size;
+ uint32_t oa_size;
+ uint32_t gds_compute_max_wave_id;
+ uint32_t vgt_gs_max_wave_id;
};
struct amdgpu_gds_reg_offset {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d4fcf5475464..939f8305511b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -26,9 +26,13 @@
* Jerome Glisse
*/
#include <linux/ktime.h>
+#include <linux/module.h>
#include <linux/pagemap.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
#include <drm/amdgpu_drm.h>
+#include <drm/drm_debugfs.h>
+
#include "amdgpu.h"
#include "amdgpu_display.h"
#include "amdgpu_xgmi.h"
@@ -171,7 +175,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
- r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
+ r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates, false);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
@@ -323,33 +327,30 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
}
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
- r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
- bo->tbo.ttm->pages);
+ r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
if (r)
goto release_object;
r = amdgpu_bo_reserve(bo, true);
if (r)
- goto free_pages;
+ goto user_pages_done;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);
if (r)
- goto free_pages;
+ goto user_pages_done;
}
r = drm_gem_handle_create(filp, gobj, &handle);
- /* drop reference from allocate - handle holds it now */
- drm_gem_object_put_unlocked(gobj);
if (r)
- return r;
+ goto user_pages_done;
args->handle = handle;
- return 0;
-free_pages:
- release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
+user_pages_done:
+ if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
release_object:
drm_gem_object_put_unlocked(gobj);
@@ -610,7 +611,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
- r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+ r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates, false);
if (r)
goto error_unref;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index f1ddfc50bcc7..b8ba6e27c61f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv);
unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
- struct dma_buf_attachment *attach,
- struct sg_table *sg);
-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
- struct drm_gem_object *gobj,
- int flags);
-struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
- struct dma_buf *dma_buf);
-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
-void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
-void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
-
-extern const struct dma_buf_ops amdgpu_dmabuf_ops;
/*
* GEM objects.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 997932ebbb83..74066e1466f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -22,7 +22,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
@@ -34,8 +34,8 @@
* GPU GFX IP block helpers function.
*/
-int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
- int pipe, int queue)
+int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
+ int pipe, int queue)
{
int bit = 0;
@@ -47,8 +47,8 @@ int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
return bit;
}
-void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
- int *mec, int *pipe, int *queue)
+void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
+ int *mec, int *pipe, int *queue)
{
*queue = bit % adev->gfx.mec.num_queue_per_pipe;
*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
@@ -61,10 +61,40 @@ void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
int mec, int pipe, int queue)
{
- return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
+ return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
adev->gfx.mec.queue_bitmap);
}
+int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
+ int me, int pipe, int queue)
+{
+ int bit = 0;
+
+ bit += me * adev->gfx.me.num_pipe_per_me
+ * adev->gfx.me.num_queue_per_pipe;
+ bit += pipe * adev->gfx.me.num_queue_per_pipe;
+ bit += queue;
+
+ return bit;
+}
+
+void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
+ int *me, int *pipe, int *queue)
+{
+ *queue = bit % adev->gfx.me.num_queue_per_pipe;
+ *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
+ % adev->gfx.me.num_pipe_per_me;
+ *me = (bit / adev->gfx.me.num_queue_per_pipe)
+ / adev->gfx.me.num_pipe_per_me;
+}
+
+bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
+ int me, int pipe, int queue)
+{
+ return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
+ adev->gfx.me.queue_bitmap);
+}
+
/**
* amdgpu_gfx_scratch_get - Allocate a scratch register
*
@@ -199,6 +229,30 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
}
+void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
+{
+ int i, queue, pipe, me;
+
+ for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
+ queue = i % adev->gfx.me.num_queue_per_pipe;
+ pipe = (i / adev->gfx.me.num_queue_per_pipe)
+ % adev->gfx.me.num_pipe_per_me;
+ me = (i / adev->gfx.me.num_queue_per_pipe)
+ / adev->gfx.me.num_pipe_per_me;
+
+ if (me >= adev->gfx.me.num_me)
+ break;
+ /* policy: amdgpu owns the first queue per pipe at this stage
+ * will extend to mulitple queues per pipe later */
+ if (me == 0 && queue < 1)
+ set_bit(i, adev->gfx.me.queue_bitmap);
+ }
+
+ /* update the number of active graphics rings */
+ adev->gfx.num_gfx_rings =
+ bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+}
+
static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
@@ -213,7 +267,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
continue;
- amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue);
+ amdgpu_gfx_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
/*
* 1. Using pipes 2/3 from MEC 2 seems cause problems.
@@ -306,9 +360,9 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
return 0;
}
-/* create MQD for each compute queue */
-int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
- unsigned mqd_size)
+/* create MQD for each compute/gfx queue */
+int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
+ unsigned mqd_size)
{
struct amdgpu_ring *ring = NULL;
int r, i;
@@ -335,6 +389,27 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
}
+ if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) {
+ /* create MQD for each KGQ */
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ if (!ring->mqd_obj) {
+ r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+ &ring->mqd_gpu_addr, &ring->mqd_ptr);
+ if (r) {
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+ return r;
+ }
+
+ /* prepare MQD backup */
+ adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
+ if (!adev->gfx.me.mqd_backup[i])
+ dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+ }
+ }
+ }
+
/* create MQD for each KCQ */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
@@ -343,7 +418,7 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
&ring->mqd_gpu_addr, &ring->mqd_ptr);
if (r) {
- dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
+ dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
return r;
}
@@ -357,11 +432,21 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
return 0;
}
-void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = NULL;
int i;
+ if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ kfree(adev->gfx.me.mqd_backup[i]);
+ amdgpu_bo_free_kernel(&ring->mqd_obj,
+ &ring->mqd_gpu_addr,
+ &ring->mqd_ptr);
+ }
+ }
+
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
kfree(adev->gfx.mec.mqd_backup[i]);
@@ -371,12 +456,81 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
}
ring = &adev->gfx.kiq.ring;
+ if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring)
+ kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]);
kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
amdgpu_bo_free_kernel(&ring->mqd_obj,
&ring->mqd_gpu_addr,
&ring->mqd_ptr);
}
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int i;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+ adev->gfx.num_compute_rings))
+ return -ENOMEM;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
+ RESET_QUEUES, 0, 0);
+
+ return amdgpu_ring_test_ring(kiq_ring);
+}
+
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ uint64_t queue_mask = 0;
+ int r, i;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
+ return -EINVAL;
+
+ for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+ if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+ continue;
+
+ /* This situation may be hit in the future if a new HW
+ * generation exposes more than 64 queues. If so, the
+ * definition of queue_mask needs updating */
+ if (WARN_ON(i > (sizeof(queue_mask)*8))) {
+ DRM_ERROR("Invalid KCQ enabled: %d\n", i);
+ break;
+ }
+
+ queue_mask |= (1ull << i);
+ }
+
+ DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
+ kiq_ring->queue);
+
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+ adev->gfx.num_compute_rings +
+ kiq->pmf->set_resources_size);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+
+ kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
+
+ r = amdgpu_ring_test_helper(kiq_ring);
+ if (r)
+ DRM_ERROR("KCQ enable failed\n");
+
+ return r;
+}
+
/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
*
* @adev: amdgpu_device pointer
@@ -393,7 +547,9 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
return;
- if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->set_powergating_by_smu)
+ if (!is_support_sw_smu(adev) &&
+ (!adev->powerplay.pp_funcs ||
+ !adev->powerplay.pp_funcs->set_powergating_by_smu))
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 09fc53af3d35..f96407ba9770 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -38,6 +38,7 @@
#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
+#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
struct amdgpu_mec {
@@ -54,12 +55,41 @@ struct amdgpu_mec {
DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
};
+enum amdgpu_unmap_queues_action {
+ PREEMPT_QUEUES = 0,
+ RESET_QUEUES,
+ DISABLE_PROCESS_QUEUES,
+ PREEMPT_QUEUES_NO_UNMAP,
+};
+
+struct kiq_pm4_funcs {
+ /* Support ASIC-specific kiq pm4 packets*/
+ void (*kiq_set_resources)(struct amdgpu_ring *kiq_ring,
+ uint64_t queue_mask);
+ void (*kiq_map_queues)(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring);
+ void (*kiq_unmap_queues)(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq);
+ void (*kiq_query_status)(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq);
+ /* Packet sizes */
+ int set_resources_size;
+ int map_queues_size;
+ int unmap_queues_size;
+ int query_status_size;
+};
+
struct amdgpu_kiq {
u64 eop_gpu_addr;
struct amdgpu_bo *eop_obj;
spinlock_t ring_lock;
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
+ const struct kiq_pm4_funcs *pmf;
};
/*
@@ -131,6 +161,10 @@ struct amdgpu_gfx_config {
uint32_t double_offchip_lds_buf;
/* cached value of DB_DEBUG2 */
uint32_t db_debug2;
+ /* gfx10 specific config */
+ uint32_t num_sc_per_sh;
+ uint32_t num_packer_per_sc;
+ uint32_t pa_sc_tile_steering_override;
};
struct amdgpu_cu_info {
@@ -191,10 +225,38 @@ struct sq_work {
unsigned ih_data;
};
+struct amdgpu_pfp {
+ struct amdgpu_bo *pfp_fw_obj;
+ uint64_t pfp_fw_gpu_addr;
+ uint32_t *pfp_fw_ptr;
+};
+
+struct amdgpu_ce {
+ struct amdgpu_bo *ce_fw_obj;
+ uint64_t ce_fw_gpu_addr;
+ uint32_t *ce_fw_ptr;
+};
+
+struct amdgpu_me {
+ struct amdgpu_bo *me_fw_obj;
+ uint64_t me_fw_gpu_addr;
+ uint32_t *me_fw_ptr;
+ uint32_t num_me;
+ uint32_t num_pipe_per_me;
+ uint32_t num_queue_per_pipe;
+ void *mqd_backup[AMDGPU_MAX_GFX_RINGS + 1];
+
+ /* These are the resources for which amdgpu takes ownership */
+ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+};
+
struct amdgpu_gfx {
struct mutex gpu_clock_mutex;
struct amdgpu_gfx_config config;
struct amdgpu_rlc rlc;
+ struct amdgpu_pfp pfp;
+ struct amdgpu_ce ce;
+ struct amdgpu_me me;
struct amdgpu_mec mec;
struct amdgpu_kiq kiq;
struct amdgpu_scratch scratch;
@@ -297,17 +359,27 @@ void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
unsigned hpd_size);
-int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
- unsigned mqd_size);
-void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
+ unsigned mqd_size);
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev);
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev);
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
-int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
- int pipe, int queue);
-void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
- int *mec, int *pipe, int *queue);
+void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev);
+
+int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
+ int pipe, int queue);
+void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
+ int *mec, int *pipe, int *queue);
bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
int pipe, int queue);
+int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
+ int pipe, int queue);
+void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
+ int *me, int *pipe, int *queue);
+bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
+ int pipe, int queue);
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 250d9212cc38..924d83e711ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -24,6 +24,8 @@
*
*/
+#include <linux/io-64-nonatomic-lo-hi.h>
+
#include "amdgpu.h"
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 62591d081856..627104401e84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -22,7 +22,6 @@
* Authors: Christian König
*/
-#include <drm/drmP.h>
#include "amdgpu.h"
struct amdgpu_gtt_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index f2739995c335..70dbe343f51d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -23,9 +23,10 @@
* Authors: Dave Airlie
* Alex Deucher
*/
+
#include <linux/export.h>
+#include <linux/pci.h>
-#include <drm/drmP.h>
#include <drm/drm_edid.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index fe393a46f881..7850084a05e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -28,8 +28,10 @@
*/
#include <linux/seq_file.h>
#include <linux/slab.h>
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
+#include <drm/drm_debugfs.h>
+
#include "amdgpu.h"
#include "atom.h"
#include "amdgpu_trace.h"
@@ -209,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
skip_preamble = ring->current_ctx == fence_ctx;
if (job && ring->funcs->emit_cntxcntl) {
status |= job->preamble_status;
+ status |= job->preemption_status;
amdgpu_ring_emit_cntxcntl(ring, status);
}
@@ -217,9 +220,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
/* drop preamble IBs if we don't have a context switch */
if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
- skip_preamble &&
- !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
- !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
+ skip_preamble &&
+ !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
+ !amdgpu_mcbp &&
+ !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
continue;
amdgpu_ring_emit_ib(ring, job, ib, status);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index df9b173c3d0b..57b3d8a9bef3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -24,7 +24,7 @@
#include <linux/idr.h>
#include <linux/dma-fence-array.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_trace.h"
@@ -364,8 +364,11 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
needs_flush = true;
- /* Concurrent flushes are only possible starting with Vega10 */
- if (adev->asic_type < CHIP_VEGA10 && needs_flush)
+ /* Concurrent flushes are only possible starting with Vega10 and
+ * are broken on Navi10 and Navi14.
+ */
+ if (needs_flush && (adev->asic_type < CHIP_VEGA10 ||
+ adev->asic_type == CHIP_NAVI10))
continue;
/* Good, we can use this VMID. Remember this submission as
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 934dfdcb4e73..6d8f05511aba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -21,7 +21,8 @@
*
*/
-#include <drm/drmP.h>
+#include <linux/dma-mapping.h>
+
#include "amdgpu.h"
#include "amdgpu_ih.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
index 26482914dc4b..5cf142e849bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
@@ -29,8 +29,9 @@
*/
#include <linux/compat.h>
-#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
+#include <drm/drm_ioctl.h>
+
#include "amdgpu_drv.h"
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index af4c3b1af322..2a3f5ec298db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -43,8 +43,11 @@
*/
#include <linux/irq.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
#include <drm/drm_crtc_helper.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_vblank.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_ih.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 0a17fb1af204..9d76e0923a5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -24,7 +24,7 @@
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/sched.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_trace.h"
@@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
if (amdgpu_device_should_recover_gpu(ring->adev))
amdgpu_device_gpu_recover(ring->adev, job);
+ else
+ drm_sched_suspend_timeout(&ring->sched);
}
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index e1b46a6703de..51e62504c279 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -29,6 +29,8 @@
#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1)
/* bit set means context switch occured */
#define AMDGPU_HAVE_CTX_SWITCH (1 << 2)
+/* bit set means IB is preempted */
+#define AMDGPU_IB_PREEMPTED (1 << 3)
#define to_amdgpu_job(sched_job) \
container_of((sched_job), struct amdgpu_job, base)
@@ -45,6 +47,7 @@ struct amdgpu_job {
struct amdgpu_ib *ibs;
struct dma_fence *fence; /* the hw fence */
uint32_t preamble_status;
+ uint32_t preemption_status;
uint32_t num_ibs;
void *owner;
bool vm_needs_flush;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b17d0545728e..0cf7e8606fd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -25,8 +25,9 @@
* Alex Deucher
* Jerome Glisse
*/
-#include <drm/drmP.h>
+
#include "amdgpu.h"
+#include <drm/drm_debugfs.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu_sched.h"
#include "amdgpu_uvd.h"
@@ -35,13 +36,15 @@
#include <linux/vga_switcheroo.h>
#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include "amdgpu_amdkfd.h"
#include "amdgpu_gem.h"
#include "amdgpu_display.h"
#include "amdgpu_ras.h"
-static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
struct amdgpu_gpu_instance *gpu_instance;
int i;
@@ -102,7 +105,7 @@ done_free:
dev->dev_private = NULL;
}
-static void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
{
struct amdgpu_gpu_instance *gpu_instance;
@@ -590,13 +593,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_amdgpu_info_gds gds_info;
memset(&gds_info, 0, sizeof(gds_info));
- gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
- gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
- gds_info.gds_total_size = adev->gds.mem.total_size;
- gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
- gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
- gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
- gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
+ gds_info.compute_partition_size = adev->gds.gds_size;
+ gds_info.gds_total_size = adev->gds.gds_size;
+ gds_info.gws_per_compute_partition = adev->gds.gws_size;
+ gds_info.oa_per_compute_partition = adev->gds.oa_size;
return copy_to_user(out, &gds_info,
min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
}
@@ -712,7 +712,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.ids_flags = 0;
if (adev->flags & AMD_IS_APU)
dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_mcbp || amdgpu_sriov_vf(adev))
dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
@@ -765,6 +765,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth;
dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads;
+ if (adev->family >= AMDGPU_FAMILY_NV)
+ dev_info.pa_sc_tile_steering_override =
+ adev->gfx.config.pa_sc_tile_steering_override;
+
return copy_to_user(out, &dev_info,
min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;
}
@@ -977,7 +981,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
int r, pasid;
/* Ensure IB tests are run on ring */
- flush_delayed_work(&adev->late_init_work);
+ flush_delayed_work(&adev->delayed_init_work);
file_priv->driver_priv = NULL;
@@ -1006,7 +1010,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
goto error_vm;
}
- if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
@@ -1069,7 +1073,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
- if (amdgpu_sriov_vf(adev)) {
+ if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
/* TODO: how to handle reserve failure */
BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
amdgpu_vm_bo_rmv(adev, fpriv->csa_va);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
new file mode 100644
index 000000000000..78fe49033543
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_MES_H__
+#define __AMDGPU_MES_H__
+
+struct amdgpu_mes_funcs;
+
+struct amdgpu_mes {
+ struct amdgpu_adev *adev;
+
+ const struct firmware *fw;
+
+ /* mes ucode */
+ struct amdgpu_bo *ucode_fw_obj;
+ uint64_t ucode_fw_gpu_addr;
+ uint32_t *ucode_fw_ptr;
+ uint32_t ucode_fw_version;
+ uint64_t uc_start_addr;
+
+ /* mes ucode data */
+ struct amdgpu_bo *data_fw_obj;
+ uint64_t data_fw_gpu_addr;
+ uint32_t *data_fw_ptr;
+ uint32_t data_fw_version;
+ uint64_t data_start_addr;
+
+ /* ip specific functions */
+ struct amdgpu_mes_funcs *funcs;
+};
+
+struct mes_add_queue_input {
+ uint32_t process_id;
+ uint64_t page_table_base_addr;
+ uint64_t process_va_start;
+ uint64_t process_va_end;
+ uint64_t process_quantum;
+ uint64_t process_context_addr;
+ uint64_t gang_quantum;
+ uint64_t gang_context_addr;
+ uint32_t inprocess_gang_priority;
+ uint32_t gang_global_priority_level;
+ uint32_t doorbell_offset;
+ uint64_t mqd_addr;
+ uint64_t wptr_addr;
+ uint32_t queue_type;
+ uint32_t paging;
+};
+
+struct mes_remove_queue_input {
+ uint32_t doorbell_offset;
+ uint64_t gang_context_addr;
+};
+
+struct mes_suspend_gang_input {
+ bool suspend_all_gangs;
+ uint64_t gang_context_addr;
+ uint64_t suspend_fence_addr;
+ uint32_t suspend_fence_value;
+};
+
+struct mes_resume_gang_input {
+ bool resume_all_gangs;
+ uint64_t gang_context_addr;
+};
+
+struct amdgpu_mes_funcs {
+ int (*add_hw_queue)(struct amdgpu_mes *mes,
+ struct mes_add_queue_input *input);
+
+ int (*remove_hw_queue)(struct amdgpu_mes *mes,
+ struct mes_remove_queue_input *input);
+
+ int (*suspend_gang)(struct amdgpu_mes *mes,
+ struct mes_suspend_gang_input *input);
+
+ int (*resume_gang)(struct amdgpu_mes *mes,
+ struct mes_resume_gang_input *input);
+};
+
+#endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 58ed401c5996..3971c201f320 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,51 +45,12 @@
#include <linux/firmware.h>
#include <linux/module.h>
-#include <linux/mmu_notifier.h>
-#include <linux/interval_tree.h>
-#include <drm/drmP.h>
#include <drm/drm.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
/**
- * struct amdgpu_mn
- *
- * @adev: amdgpu device pointer
- * @mm: process address space
- * @mn: MMU notifier structure
- * @type: type of MMU notifier
- * @work: destruction work item
- * @node: hash table node to find structure by adev and mn
- * @lock: rw semaphore protecting the notifier nodes
- * @objects: interval tree containing amdgpu_mn_nodes
- * @read_lock: mutex for recursive locking of @lock
- * @recursion: depth of recursion
- *
- * Data for each amdgpu device and process address space.
- */
-struct amdgpu_mn {
- /* constant after initialisation */
- struct amdgpu_device *adev;
- struct mm_struct *mm;
- struct mmu_notifier mn;
- enum amdgpu_mn_type type;
-
- /* only used on destruction */
- struct work_struct work;
-
- /* protected by adev->mn_lock */
- struct hlist_node node;
-
- /* objects protected by lock */
- struct rw_semaphore lock;
- struct rb_root_cached objects;
- struct mutex read_lock;
- atomic_t recursion;
-};
-
-/**
* struct amdgpu_mn_node
*
* @it: interval node defining start-last of the affected address range
@@ -103,7 +64,7 @@ struct amdgpu_mn_node {
};
/**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
*
* @work: previously sheduled work item
*
@@ -129,28 +90,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
}
up_write(&amn->lock);
mutex_unlock(&adev->mn_lock);
- mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+
+ hmm_mirror_unregister(&amn->mirror);
kfree(amn);
}
/**
- * amdgpu_mn_release - callback to notify about mm destruction
+ * amdgpu_hmm_mirror_release - callback to notify about mm destruction
*
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
*
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
*/
-static void amdgpu_mn_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
INIT_WORK(&amn->work, amdgpu_mn_destroy);
schedule_work(&amn->work);
}
-
/**
* amdgpu_mn_lock - take the write side lock for this notifier
*
@@ -181,14 +140,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
{
if (blockable)
- mutex_lock(&amn->read_lock);
- else if (!mutex_trylock(&amn->read_lock))
+ down_read(&amn->lock);
+ else if (!down_read_trylock(&amn->lock))
return -EAGAIN;
- if (atomic_inc_return(&amn->recursion) == 1)
- down_read_non_owner(&amn->lock);
- mutex_unlock(&amn->read_lock);
-
return 0;
}
@@ -199,8 +154,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
*/
static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
{
- if (atomic_dec_return(&amn->recursion) == 0)
- up_read_non_owner(&amn->lock);
+ up_read(&amn->lock);
}
/**
@@ -229,149 +183,132 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
true, false, MAX_SCHEDULE_TIMEOUT);
if (r <= 0)
DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
- amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
}
}
/**
- * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
+ * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
*
- * @mn: our notifier
- * @range: mmu notifier context
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
+static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
- unsigned long end;
/* notification is exclusive, but interval is inclusive */
- end = range->end - 1;
+ end -= 1;
/* TODO we should be able to split locking for interval tree and
* amdgpu_mn_invalidate_node
*/
- if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
+ if (amdgpu_mn_read_lock(amn, blockable))
return -EAGAIN;
- it = interval_tree_iter_first(&amn->objects, range->start, end);
+ it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
- if (!mmu_notifier_range_blockable(range)) {
+ if (!blockable) {
amdgpu_mn_read_unlock(amn);
return -EAGAIN;
}
node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, range->start, end);
+ it = interval_tree_iter_next(it, start, end);
- amdgpu_mn_invalidate_node(node, range->start, end);
+ amdgpu_mn_invalidate_node(node, start, end);
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
/**
- * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
+ * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
*
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* We temporarily evict all BOs between start and end. This
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
+static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
- unsigned long end;
/* notification is exclusive, but interval is inclusive */
- end = range->end - 1;
+ end -= 1;
- if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
+ if (amdgpu_mn_read_lock(amn, blockable))
return -EAGAIN;
- it = interval_tree_iter_first(&amn->objects, range->start, end);
+ it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
struct amdgpu_bo *bo;
- if (!mmu_notifier_range_blockable(range)) {
+ if (!blockable) {
amdgpu_mn_read_unlock(amn);
return -EAGAIN;
}
node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, range->start, end);
+ it = interval_tree_iter_next(it, start, end);
list_for_each_entry(bo, &node->bos, mn_list) {
struct kgd_mem *mem = bo->kfd_bo;
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
- range->start,
- end))
- amdgpu_amdkfd_evict_userptr(mem, range->mm);
+ start, end))
+ amdgpu_amdkfd_evict_userptr(mem, amn->mm);
}
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
*/
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
- amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
[AMDGPU_MN_TYPE_GFX] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
+ .release = amdgpu_hmm_mirror_release
},
[AMDGPU_MN_TYPE_HSA] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
+ .release = amdgpu_hmm_mirror_release
},
};
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
/**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
*
* @adev: amdgpu device pointer
* @type: type of MMU notifier context
*
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
*/
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
@@ -401,12 +338,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
amn->mm = mm;
init_rwsem(&amn->lock);
amn->type = type;
- amn->mn.ops = &amdgpu_mn_ops[type];
amn->objects = RB_ROOT_CACHED;
- mutex_init(&amn->read_lock);
- atomic_set(&amn->recursion, 0);
- r = __mmu_notifier_register(&amn->mn, mm);
+ amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+ r = hmm_mirror_register(&amn->mirror, mm);
if (r)
goto free_amn;
@@ -432,7 +367,7 @@ free_amn:
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -488,11 +423,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
}
/**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
*
* @bo: amdgpu buffer object
*
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
@@ -528,3 +463,25 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
mutex_unlock(&adev->mn_lock);
}
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+ (1 << 0), /* HMM_PFN_VALID */
+ (1 << 1), /* HMM_PFN_WRITE */
+ 0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+ 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+ 0, /* HMM_PFN_NONE */
+ 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
+void amdgpu_hmm_init_range(struct hmm_range *range)
+{
+ if (range) {
+ range->flags = hmm_range_flags;
+ range->values = hmm_range_values;
+ range->pfn_shift = PAGE_SHIFT;
+ INIT_LIST_HEAD(&range->list);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..b8ed68943625 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -24,23 +24,61 @@
#ifndef __AMDGPU_MN_H__
#define __AMDGPU_MN_H__
-/*
- * MMU Notifier
- */
-struct amdgpu_mn;
+#include <linux/types.h>
+#include <linux/hmm.h>
+#include <linux/rwsem.h>
+#include <linux/workqueue.h>
+#include <linux/interval_tree.h>
enum amdgpu_mn_type {
AMDGPU_MN_TYPE_GFX,
AMDGPU_MN_TYPE_HSA,
};
-#if defined(CONFIG_MMU_NOTIFIER)
+/**
+ * struct amdgpu_mn
+ *
+ * @adev: amdgpu device pointer
+ * @mm: process address space
+ * @type: type of MMU notifier
+ * @work: destruction work item
+ * @node: hash table node to find structure by adev and mn
+ * @lock: rw semaphore protecting the notifier nodes
+ * @objects: interval tree containing amdgpu_mn_nodes
+ * @mirror: HMM mirror function support
+ *
+ * Data for each amdgpu device and process address space.
+ */
+struct amdgpu_mn {
+ /* constant after initialisation */
+ struct amdgpu_device *adev;
+ struct mm_struct *mm;
+ enum amdgpu_mn_type type;
+
+ /* only used on destruction */
+ struct work_struct work;
+
+ /* protected by adev->mn_lock */
+ struct hlist_node node;
+
+ /* objects protected by lock */
+ struct rw_semaphore lock;
+ struct rb_root_cached objects;
+
+#ifdef CONFIG_HMM_MIRROR
+ /* HMM mirror */
+ struct hmm_mirror mirror;
+#endif
+};
+
+#if defined(CONFIG_HMM_MIRROR)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
+void amdgpu_hmm_init_range(struct hmm_range *range);
#else
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
@@ -51,6 +89,8 @@ static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
}
static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
+ DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
+ "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
return -ENODEV;
}
static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 2e9e3db778c6..eb9975f4decb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -331,8 +331,6 @@ struct amdgpu_mode_info {
struct drm_property *audio_property;
/* FMT dithering */
struct drm_property *dither_property;
- /* maximum number of bits per channel for monitor color */
- struct drm_property *max_bpc_property;
/* Adaptive Backlight Modulation (power feature) */
struct drm_property *abm_level_property;
/* hardcoded DFP edid from BIOS */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 93b2c5a48a71..bea6f298dfdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -31,7 +31,7 @@
*/
#include <linux/list.h>
#include <linux/slab.h>
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include <drm/drm_cache.h>
#include "amdgpu.h"
@@ -495,7 +495,11 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
#endif
bo->tbo.bdev = &adev->mman.bdev;
- amdgpu_bo_placement_from_domain(bo, bp->domain);
+ if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
+ AMDGPU_GEM_DOMAIN_GDS))
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ else
+ amdgpu_bo_placement_from_domain(bo, bp->domain);
if (bp->type == ttm_bo_type_kernel)
bo->tbo.priority = 1;
@@ -975,6 +979,7 @@ static const char *amdgpu_vram_names[] = {
"HBM",
"DDR3",
"DDR4",
+ "GDDR6",
};
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index c430e8259038..d60593cc436e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -155,7 +155,7 @@ static inline int amdgpu_bo_reserve(struct amdgpu_bo *bo, bool no_intr)
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r;
- r = ttm_bo_reserve(&bo->tbo, !no_intr, false, NULL);
+ r = __ttm_bo_reserve(&bo->tbo, !no_intr, false, NULL);
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
dev_err(adev->dev, "%p reserve failed\n", bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
index 8e67c1210d7c..1f2305b7bd13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "atom.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index abeaab4bf1bc..193d53720d9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -22,7 +22,9 @@
* Authors: Rafał Miłecki <zajec5@gmail.com>
* Alex Deucher <alexdeucher@gmail.com>
*/
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
+
#include "amdgpu.h"
#include "amdgpu_drv.h"
#include "amdgpu_pm.h"
@@ -31,6 +33,7 @@
#include "amdgpu_smu.h"
#include "atom.h"
#include <linux/power_supply.h>
+#include <linux/pci.h>
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
#include <linux/nospec.h>
@@ -64,9 +67,21 @@ static const struct cg_flag_name clocks[] = {
{AMD_CG_SUPPORT_DRM_LS, "Digital Right Management Light Sleep"},
{AMD_CG_SUPPORT_ROM_MGCG, "Rom Medium Grain Clock Gating"},
{AMD_CG_SUPPORT_DF_MGCG, "Data Fabric Medium Grain Clock Gating"},
+
+ {AMD_CG_SUPPORT_ATHUB_MGCG, "Address Translation Hub Medium Grain Clock Gating"},
+ {AMD_CG_SUPPORT_ATHUB_LS, "Address Translation Hub Light Sleep"},
{0, NULL},
};
+static const struct hwmon_temp_label {
+ enum PP_HWMON_TEMP channel;
+ const char *label;
+} temp_label[] = {
+ {PP_TEMP_EDGE, "edge"},
+ {PP_TEMP_JUNCTION, "junction"},
+ {PP_TEMP_MEM, "mem"},
+};
+
void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
{
if (adev->pm.dpm_enabled) {
@@ -260,8 +275,11 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
enum amd_dpm_forced_level level = 0xff;
- if ((adev->flags & AMD_IS_PX) &&
- (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return snprintf(buf, PAGE_SIZE, "off\n");
if (is_support_sw_smu(adev))
@@ -299,10 +317,12 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
- if (is_support_sw_smu(adev))
- current_level = smu_get_performance_level(&adev->smu);
- else if (adev->powerplay.pp_funcs->get_performance_level)
- current_level = amdgpu_dpm_get_performance_level(adev);
+ if (!amdgpu_sriov_vf(adev)) {
+ if (is_support_sw_smu(adev))
+ current_level = smu_get_performance_level(&adev->smu);
+ else if (adev->powerplay.pp_funcs->get_performance_level)
+ current_level = amdgpu_dpm_get_performance_level(adev);
+ }
if (strncmp("low", buf, strlen("low")) == 0) {
level = AMD_DPM_FORCED_LEVEL_LOW;
@@ -353,18 +373,9 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
}
if (is_support_sw_smu(adev)) {
- mutex_lock(&adev->pm.mutex);
- if (adev->pm.dpm.thermal_active) {
- count = -EINVAL;
- mutex_unlock(&adev->pm.mutex);
- goto fail;
- }
ret = smu_force_performance_level(&adev->smu, level);
if (ret)
count = -EINVAL;
- else
- adev->pm.dpm.forced_level = level;
- mutex_unlock(&adev->pm.mutex);
} else if (adev->powerplay.pp_funcs->force_performance_level) {
mutex_lock(&adev->pm.mutex);
if (adev->pm.dpm.thermal_active) {
@@ -678,12 +689,12 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
if (ret)
return -EINVAL;
} else {
- if (adev->powerplay.pp_funcs->odn_edit_dpm_table)
+ if (adev->powerplay.pp_funcs->odn_edit_dpm_table) {
ret = amdgpu_dpm_odn_edit_dpm_table(adev, type,
parameter, parameter_size);
-
- if (ret)
- return -EINVAL;
+ if (ret)
+ return -EINVAL;
+ }
if (type == PP_OD_COMMIT_DPM_TABLE) {
if (adev->powerplay.pp_funcs->dispatch_tasks) {
@@ -709,10 +720,10 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
uint32_t size = 0;
if (is_support_sw_smu(adev)) {
- size = smu_print_clk_levels(&adev->smu, OD_SCLK, buf);
- size += smu_print_clk_levels(&adev->smu, OD_MCLK, buf+size);
- size += smu_print_clk_levels(&adev->smu, OD_VDDC_CURVE, buf+size);
- size += smu_print_clk_levels(&adev->smu, OD_RANGE, buf+size);
+ size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf);
+ size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size);
+ size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size);
+ size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size);
return size;
} else if (adev->powerplay.pp_funcs->print_clock_levels) {
size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
@@ -758,7 +769,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
pr_debug("featuremask = 0x%llx\n", featuremask);
- if (adev->powerplay.pp_funcs->set_ppfeature_status) {
+ if (is_support_sw_smu(adev)) {
+ ret = smu_set_ppfeature_status(&adev->smu, featuremask);
+ if (ret)
+ return -EINVAL;
+ } else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
if (ret)
return -EINVAL;
@@ -774,7 +789,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
- if (adev->powerplay.pp_funcs->get_ppfeature_status)
+ if (is_support_sw_smu(adev)) {
+ return smu_get_ppfeature_status(&adev->smu, buf);
+ } else if (adev->powerplay.pp_funcs->get_ppfeature_status)
return amdgpu_dpm_get_ppfeature_status(adev, buf);
return snprintf(buf, PAGE_SIZE, "\n");
@@ -817,7 +834,7 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
return adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf);
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, PP_SCLK, buf);
+ return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
else
@@ -870,12 +887,15 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
int ret;
uint32_t mask = 0;
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, PP_SCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
@@ -892,8 +912,12 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) &&
+ adev->virt.ops->get_pp_clk)
+ return adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf);
+
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, PP_MCLK, buf);
+ return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
else
@@ -910,12 +934,15 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
int ret;
uint32_t mask = 0;
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
ret = amdgpu_read_mask(buf, count, &mask);
if (ret)
return ret;
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, PP_MCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
@@ -933,7 +960,7 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, PP_SOCCLK, buf);
+ return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf);
else
@@ -955,7 +982,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
return ret;
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, PP_SOCCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask);
@@ -973,7 +1000,7 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, PP_FCLK, buf);
+ return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf);
else
@@ -995,7 +1022,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
return ret;
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, PP_FCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask);
@@ -1013,7 +1040,7 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, PP_DCEFCLK, buf);
+ return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf);
else
@@ -1035,7 +1062,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
return ret;
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, PP_DCEFCLK, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask);
@@ -1053,7 +1080,7 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
struct amdgpu_device *adev = ddev->dev_private;
if (is_support_sw_smu(adev))
- return smu_print_clk_levels(&adev->smu, PP_PCIE, buf);
+ return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
else if (adev->powerplay.pp_funcs->print_clock_levels)
return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
else
@@ -1075,7 +1102,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
return ret;
if (is_support_sw_smu(adev))
- ret = smu_force_clk_levels(&adev->smu, PP_PCIE, mask);
+ ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask);
else if (adev->powerplay.pp_funcs->force_clock_level)
ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
@@ -1094,7 +1121,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
uint32_t value = 0;
if (is_support_sw_smu(adev))
- value = smu_get_od_percentage(&(adev->smu), OD_SCLK);
+ value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK);
else if (adev->powerplay.pp_funcs->get_sclk_od)
value = amdgpu_dpm_get_sclk_od(adev);
@@ -1119,7 +1146,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
}
if (is_support_sw_smu(adev)) {
- value = smu_set_od_percentage(&(adev->smu), OD_SCLK, (uint32_t)value);
+ value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value);
} else {
if (adev->powerplay.pp_funcs->set_sclk_od)
amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
@@ -1145,7 +1172,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
uint32_t value = 0;
if (is_support_sw_smu(adev))
- value = smu_get_od_percentage(&(adev->smu), OD_MCLK);
+ value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK);
else if (adev->powerplay.pp_funcs->get_mclk_od)
value = amdgpu_dpm_get_mclk_od(adev);
@@ -1170,7 +1197,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
}
if (is_support_sw_smu(adev)) {
- value = smu_set_od_percentage(&(adev->smu), OD_MCLK, (uint32_t)value);
+ value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value);
} else {
if (adev->powerplay.pp_funcs->set_mclk_od)
amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
@@ -1303,6 +1330,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
}
/**
+ * DOC: mem_busy_percent
+ *
+ * The amdgpu driver provides a sysfs API for reading how busy the VRAM
+ * is as a percentage. The file mem_busy_percent is used for this.
+ * The SMU firmware computes a percentage of load based on the
+ * aggregate activity level in the IP cores.
+ */
+static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ int r, value, size = sizeof(value);
+
+ /* read the IP busy sensor */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
+ (void *)&value, &size);
+
+ if (r)
+ return r;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+/**
* DOC: pcie_bw
*
* The amdgpu driver provides a sysfs API for estimating how much data
@@ -1327,6 +1380,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
count0, count1, pcie_get_mps(adev->pdev));
}
+/**
+ * DOC: unique_id
+ *
+ * The amdgpu driver provides a sysfs API for providing a unique ID for the GPU
+ * The file unique_id is used for this.
+ * This will provide a Unique ID that will persist from machine to machine
+ *
+ * NOTE: This will only work for GFX9 and newer. This file will be absent
+ * on unsupported ASICs (GFX8 and older)
+ */
+static ssize_t amdgpu_get_unique_id(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+
+ if (adev->unique_id)
+ return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
+
+ return 0;
+}
+
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
@@ -1371,10 +1447,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
amdgpu_set_pp_od_clk_voltage);
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
amdgpu_get_busy_percent, NULL);
+static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
+ amdgpu_get_memory_busy_percent, NULL);
static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
amdgpu_get_ppfeature_status,
amdgpu_set_ppfeature_status);
+static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
@@ -1382,18 +1461,40 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
struct drm_device *ddev = adev->ddev;
- int r, temp, size = sizeof(temp);
+ int channel = to_sensor_dev_attr(attr)->index;
+ int r, temp = 0, size = sizeof(temp);
/* Can't get temperature when the card is off */
if ((adev->flags & AMD_IS_PX) &&
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
- /* get the temperature */
- r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
- (void *)&temp, &size);
- if (r)
- return r;
+ if (channel >= PP_TEMP_MAX)
+ return -EINVAL;
+
+ switch (channel) {
+ case PP_TEMP_JUNCTION:
+ /* get current junction temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ case PP_TEMP_EDGE:
+ /* get current edge temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ case PP_TEMP_MEM:
+ /* get current memory temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
+ break;
+ }
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
@@ -1414,6 +1515,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
+static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int hyst = to_sensor_dev_attr(attr)->index;
+ int temp;
+
+ if (hyst)
+ temp = adev->pm.dpm.thermal.min_hotspot_temp;
+ else
+ temp = adev->pm.dpm.thermal.max_hotspot_crit_temp;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
+static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int hyst = to_sensor_dev_attr(attr)->index;
+ int temp;
+
+ if (hyst)
+ temp = adev->pm.dpm.thermal.min_mem_temp;
+ else
+ temp = adev->pm.dpm.thermal.max_mem_crit_temp;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
+static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int channel = to_sensor_dev_attr(attr)->index;
+
+ if (channel >= PP_TEMP_MAX)
+ return -EINVAL;
+
+ return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
+}
+
+static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int channel = to_sensor_dev_attr(attr)->index;
+ int temp = 0;
+
+ if (channel >= PP_TEMP_MAX)
+ return -EINVAL;
+
+ switch (channel) {
+ case PP_TEMP_JUNCTION:
+ temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
+ break;
+ case PP_TEMP_EDGE:
+ temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
+ break;
+ case PP_TEMP_MEM:
+ temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
+ break;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1983,11 +2154,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
* hwmon interfaces for GPU temperature:
*
- * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
+ * - temp2_input and temp3_input are supported on SOC15 dGPUs only
*
- * - temp1_crit: temperature critical max value in millidegrees Celsius
+ * - temp[1-3]_label: temperature channel label
+ * - temp2_label and temp3_label are supported on SOC15 dGPUs only
*
- * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
+ * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
+ *
+ * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
+ *
+ * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
+ * - these are supported on SOC15 dGPUs only
*
* hwmon interfaces for GPU voltage:
*
@@ -2035,9 +2215,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
*/
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
+static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
+static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
@@ -2064,6 +2256,18 @@ static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_temp1_input.dev_attr.attr,
&sensor_dev_attr_temp1_crit.dev_attr.attr,
&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp2_input.dev_attr.attr,
+ &sensor_dev_attr_temp2_crit.dev_attr.attr,
+ &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp3_input.dev_attr.attr,
+ &sensor_dev_attr_temp3_crit.dev_attr.attr,
+ &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
+ &sensor_dev_attr_temp1_emergency.dev_attr.attr,
+ &sensor_dev_attr_temp2_emergency.dev_attr.attr,
+ &sensor_dev_attr_temp3_emergency.dev_attr.attr,
+ &sensor_dev_attr_temp1_label.dev_attr.attr,
+ &sensor_dev_attr_temp2_label.dev_attr.attr,
+ &sensor_dev_attr_temp3_label.dev_attr.attr,
&sensor_dev_attr_pwm1.dev_attr.attr,
&sensor_dev_attr_pwm1_enable.dev_attr.attr,
&sensor_dev_attr_pwm1_min.dev_attr.attr,
@@ -2186,6 +2390,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
return 0;
+ /* only SOC15 dGPUs support hotspot and mem temperatures */
+ if (((adev->flags & AMD_IS_APU) ||
+ adev->asic_type < CHIP_VEGA10) &&
+ (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
+ return 0;
+
return effective_mode;
}
@@ -2490,6 +2710,44 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
}
+int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)))
+ return ret;
+
+ ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
+ if (ret) {
+ DRM_ERROR("failed to create device file pp_dpm_sclk\n");
+ return ret;
+ }
+
+ ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
+ if (ret) {
+ DRM_ERROR("failed to create device file pp_dpm_mclk\n");
+ return ret;
+ }
+
+ ret = device_create_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
+ if (ret) {
+ DRM_ERROR("failed to create device file for dpm state\n");
+ return ret;
+ }
+
+ return ret;
+}
+
+void amdgpu_pm_virt_sysfs_fini(struct amdgpu_device *adev)
+{
+ if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)))
+ return;
+
+ device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
+ device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
+ device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
+}
+
int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version)
{
int r;
@@ -2627,6 +2885,17 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
"gpu_busy_level\n");
return ret;
}
+ /* APU does not have its own dedicated memory */
+ if (!(adev->flags & AMD_IS_APU) &&
+ (adev->asic_type != CHIP_VEGA10)) {
+ ret = device_create_file(adev->dev,
+ &dev_attr_mem_busy_percent);
+ if (ret) {
+ DRM_ERROR("failed to create device file "
+ "mem_busy_percent\n");
+ return ret;
+ }
+ }
/* PCIe Perf counters won't work on APU nodes */
if (!(adev->flags & AMD_IS_APU)) {
ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
@@ -2635,6 +2904,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
return ret;
}
}
+ if (adev->unique_id)
+ ret = device_create_file(adev->dev, &dev_attr_unique_id);
+ if (ret) {
+ DRM_ERROR("failed to create device file unique_id\n");
+ return ret;
+ }
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -2692,8 +2967,13 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev,
&dev_attr_pp_od_clk_voltage);
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
+ if (!(adev->flags & AMD_IS_APU) &&
+ (adev->asic_type != CHIP_VEGA10))
+ device_remove_file(adev->dev, &dev_attr_mem_busy_percent);
if (!(adev->flags & AMD_IS_APU))
device_remove_file(adev->dev, &dev_attr_pcie_bw);
+ if (adev->unique_id)
+ device_remove_file(adev->dev, &dev_attr_unique_id);
if ((adev->asic_type >= CHIP_VEGA10) &&
!(adev->flags & AMD_IS_APU))
device_remove_file(adev->dev, &dev_attr_ppfeatures);
@@ -2790,6 +3070,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
/* GPU Load */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))
seq_printf(m, "GPU Load: %u %%\n", value);
+ /* MEM Load */
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size))
+ seq_printf(m, "MEM Load: %u %%\n", value);
+
seq_printf(m, "\n");
/* SMC feature mask */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
index 7ff0e7621fff..ef31448ee8d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h
@@ -32,7 +32,9 @@ struct cg_flag_name
void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev);
int amdgpu_pm_sysfs_init(struct amdgpu_device *adev);
+int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev);
void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev);
+void amdgpu_pm_virt_sysfs_fini(struct amdgpu_device *adev);
void amdgpu_pm_print_power_states(struct amdgpu_device *adev);
int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version);
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
new file mode 100644
index 000000000000..0e6dba9f60f0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Jonathan Kim <jonathan.kim@amd.com>
+ *
+ */
+
+#include <linux/perf_event.h>
+#include <linux/init.h>
+#include "amdgpu.h"
+#include "amdgpu_pmu.h"
+#include "df_v3_6.h"
+
+#define PMU_NAME_SIZE 32
+
+/* record to keep track of pmu entry per pmu type per device */
+struct amdgpu_pmu_entry {
+ struct list_head entry;
+ struct amdgpu_device *adev;
+ struct pmu pmu;
+ unsigned int pmu_perf_type;
+};
+
+static LIST_HEAD(amdgpu_pmu_list);
+
+
+/* initialize perf counter */
+static int amdgpu_perf_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* test the event attr type check for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* update the hw_perf_event struct with config data */
+ hwc->conf = event->attr.config;
+
+ return 0;
+}
+
+/* start perf counter */
+static void amdgpu_perf_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+ struct amdgpu_pmu_entry,
+ pmu);
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ switch (pe->pmu_perf_type) {
+ case PERF_TYPE_AMDGPU_DF:
+ if (!(flags & PERF_EF_RELOAD))
+ pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1);
+
+ pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0);
+ break;
+ default:
+ break;
+ }
+
+ perf_event_update_userpage(event);
+
+}
+
+/* read perf counter */
+static void amdgpu_perf_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+ struct amdgpu_pmu_entry,
+ pmu);
+
+ u64 count, prev;
+
+ do {
+ prev = local64_read(&hwc->prev_count);
+
+ switch (pe->pmu_perf_type) {
+ case PERF_TYPE_AMDGPU_DF:
+ pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf,
+ &count);
+ break;
+ default:
+ count = 0;
+ break;
+ };
+ } while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev);
+
+ local64_add(count - prev, &event->count);
+}
+
+/* stop perf counter */
+static void amdgpu_perf_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+ struct amdgpu_pmu_entry,
+ pmu);
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ switch (pe->pmu_perf_type) {
+ case PERF_TYPE_AMDGPU_DF:
+ pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0);
+ break;
+ default:
+ break;
+ };
+
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ amdgpu_perf_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+/* add perf counter */
+static int amdgpu_perf_add(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int retval;
+
+ struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+ struct amdgpu_pmu_entry,
+ pmu);
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ switch (pe->pmu_perf_type) {
+ case PERF_TYPE_AMDGPU_DF:
+ retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1);
+ break;
+ default:
+ return 0;
+ };
+
+ if (retval)
+ return retval;
+
+ if (flags & PERF_EF_START)
+ amdgpu_perf_start(event, PERF_EF_RELOAD);
+
+ return retval;
+
+}
+
+/* delete perf counter */
+static void amdgpu_perf_del(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct amdgpu_pmu_entry *pe = container_of(event->pmu,
+ struct amdgpu_pmu_entry,
+ pmu);
+
+ amdgpu_perf_stop(event, PERF_EF_UPDATE);
+
+ switch (pe->pmu_perf_type) {
+ case PERF_TYPE_AMDGPU_DF:
+ pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1);
+ break;
+ default:
+ break;
+ };
+
+ perf_event_update_userpage(event);
+}
+
+/* vega20 pmus */
+
+/* init pmu tracking per pmu type */
+static int init_pmu_by_type(struct amdgpu_device *adev,
+ const struct attribute_group *attr_groups[],
+ char *pmu_type_name, char *pmu_file_prefix,
+ unsigned int pmu_perf_type,
+ unsigned int num_counters)
+{
+ char pmu_name[PMU_NAME_SIZE];
+ struct amdgpu_pmu_entry *pmu_entry;
+ int ret = 0;
+
+ pmu_entry = kzalloc(sizeof(struct amdgpu_pmu_entry), GFP_KERNEL);
+
+ if (!pmu_entry)
+ return -ENOMEM;
+
+ pmu_entry->adev = adev;
+ pmu_entry->pmu = (struct pmu){
+ .event_init = amdgpu_perf_event_init,
+ .add = amdgpu_perf_add,
+ .del = amdgpu_perf_del,
+ .start = amdgpu_perf_start,
+ .stop = amdgpu_perf_stop,
+ .read = amdgpu_perf_read,
+ .task_ctx_nr = perf_invalid_context,
+ };
+
+ pmu_entry->pmu.attr_groups = attr_groups;
+ pmu_entry->pmu_perf_type = pmu_perf_type;
+ snprintf(pmu_name, PMU_NAME_SIZE, "%s_%d",
+ pmu_file_prefix, adev->ddev->primary->index);
+
+ ret = perf_pmu_register(&pmu_entry->pmu, pmu_name, -1);
+
+ if (ret) {
+ kfree(pmu_entry);
+ pr_warn("Error initializing AMDGPU %s PMUs.\n", pmu_type_name);
+ return ret;
+ }
+
+ pr_info("Detected AMDGPU %s Counters. # of Counters = %d.\n",
+ pmu_type_name, num_counters);
+
+ list_add_tail(&pmu_entry->entry, &amdgpu_pmu_list);
+
+ return 0;
+}
+
+/* init amdgpu_pmu */
+int amdgpu_pmu_init(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ /* init df */
+ ret = init_pmu_by_type(adev, df_v3_6_attr_groups,
+ "DF", "amdgpu_df", PERF_TYPE_AMDGPU_DF,
+ DF_V3_6_MAX_COUNTERS);
+
+ /* other pmu types go here*/
+ break;
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+
+/* destroy all pmu data associated with target device */
+void amdgpu_pmu_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_pmu_entry *pe, *temp;
+
+ list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) {
+ if (pe->adev == adev) {
+ list_del(&pe->entry);
+ perf_pmu_unregister(&pe->pmu);
+ kfree(pe);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h
new file mode 100644
index 000000000000..7dddb7160a11
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Jonathan Kim <jonathan.kim@amd.com>
+ *
+ */
+
+#ifndef _AMDGPU_PMU_H_
+#define _AMDGPU_PMU_H_
+
+enum amdgpu_pmu_perf_type {
+ PERF_TYPE_AMDGPU_DF = 0,
+ PERF_TYPE_AMDGPU_MAX
+};
+
+int amdgpu_pmu_init(struct amdgpu_device *adev);
+void amdgpu_pmu_fini(struct amdgpu_device *adev);
+
+#endif /* _AMDGPU_PMU_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 86cc24b2e0aa..e69ad6e089c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -24,7 +24,7 @@
*/
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "amdgpu_ucode.h"
@@ -46,12 +46,19 @@ static int psp_early_init(void *handle)
case CHIP_VEGA10:
case CHIP_VEGA12:
psp_v3_1_set_psp_funcs(psp);
+ psp->autoload_supported = false;
break;
case CHIP_RAVEN:
psp_v10_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
break;
case CHIP_VEGA20:
psp_v11_0_set_psp_funcs(psp);
+ psp->autoload_supported = false;
+ break;
+ case CHIP_NAVI10:
+ psp_v11_0_set_psp_funcs(psp);
+ psp->autoload_supported = true;
break;
default:
return -EINVAL;
@@ -182,10 +189,44 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
cmd->cmd.cmd_setup_tmr.buf_size = size;
}
+static void psp_prep_load_toc_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint64_t pri_buf_mc, uint32_t size)
+{
+ cmd->cmd_id = GFX_CMD_ID_LOAD_TOC;
+ cmd->cmd.cmd_load_toc.toc_phy_addr_lo = lower_32_bits(pri_buf_mc);
+ cmd->cmd.cmd_load_toc.toc_phy_addr_hi = upper_32_bits(pri_buf_mc);
+ cmd->cmd.cmd_load_toc.toc_size = size;
+}
+
+/* Issue LOAD TOC cmd to PSP to part toc and calculate tmr size needed */
+static int psp_load_toc(struct psp_context *psp,
+ uint32_t *tmr_size)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+ /* Copy toc to psp firmware private buffer */
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+ memcpy(psp->fw_pri_buf, psp->toc_start_addr, psp->toc_bin_size);
+
+ psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc_bin_size);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+ if (!ret)
+ *tmr_size = psp->cmd_buf_mem->resp.tmr_size;
+ kfree(cmd);
+ return ret;
+}
+
/* Set up Trusted Memory Region */
static int psp_tmr_init(struct psp_context *psp)
{
int ret;
+ int tmr_size;
/*
* According to HW engineer, they prefer the TMR address be "naturally
@@ -194,7 +235,21 @@ static int psp_tmr_init(struct psp_context *psp)
* Note: this memory need be reserved till the driver
* uninitializes.
*/
- ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, PSP_TMR_SIZE,
+ tmr_size = PSP_TMR_SIZE;
+
+ /* For ASICs support RLC autoload, psp will parse the toc
+ * and calculate the total size of TMR needed */
+ if (psp->toc_start_addr &&
+ psp->toc_bin_size &&
+ psp->fw_pri_buf) {
+ ret = psp_load_toc(psp, &tmr_size);
+ if (ret) {
+ DRM_ERROR("Failed to load toc\n");
+ return ret;
+ }
+ }
+
+ ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
@@ -210,9 +265,10 @@ static int psp_tmr_load(struct psp_context *psp)
if (!cmd)
return -ENOMEM;
- psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
- DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n",
- PSP_TMR_SIZE, psp->tmr_mc_addr);
+ psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr,
+ amdgpu_bo_size(psp->tmr_bo));
+ DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
+ amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
@@ -289,6 +345,34 @@ static int psp_asd_load(struct psp_context *psp)
return ret;
}
+static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint32_t id, uint32_t value)
+{
+ cmd->cmd_id = GFX_CMD_ID_PROG_REG;
+ cmd->cmd.cmd_setup_reg_prog.reg_value = value;
+ cmd->cmd.cmd_setup_reg_prog.reg_id = id;
+}
+
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+ uint32_t value)
+{
+ struct psp_gfx_cmd_resp *cmd = NULL;
+ int ret = 0;
+
+ if (reg >= PSP_REG_LAST)
+ return -EINVAL;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ psp_prep_reg_prog_cmd_buf(cmd, reg, value);
+ ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+ kfree(cmd);
+ return ret;
+}
+
static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
uint32_t xgmi_ta_size, uint32_t shared_size)
@@ -698,12 +782,24 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
+ ret = psp_tmr_init(psp);
+ if (ret) {
+ DRM_ERROR("PSP tmr init failed!\n");
+ return ret;
+ }
+
ret = psp_tmr_load(psp);
if (ret) {
DRM_ERROR("PSP load tmr failed!\n");
return ret;
}
+ ret = psp_asd_init(psp);
+ if (ret) {
+ DRM_ERROR("PSP asd init failed!\n");
+ return ret;
+ }
+
ret = psp_asd_load(psp);
if (ret) {
DRM_ERROR("PSP load asd failed!\n");
@@ -795,6 +891,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_DMCU_INTV:
*type = GFX_FW_TYPE_DMCU_ISR;
break;
+ case AMDGPU_UCODE_ID_VCN0_RAM:
+ *type = GFX_FW_TYPE_VCN0_RAM;
+ break;
+ case AMDGPU_UCODE_ID_VCN1_RAM:
+ *type = GFX_FW_TYPE_VCN1_RAM;
+ break;
case AMDGPU_UCODE_ID_MAXIMUM:
default:
return -EINVAL;
@@ -823,19 +925,45 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
return ret;
}
+static int psp_execute_np_fw_load(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode)
+{
+ int ret = 0;
+
+ ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd);
+ if (ret)
+ return ret;
+
+ ret = psp_cmd_submit_buf(psp, ucode, psp->cmd,
+ psp->fence_buf_mc_addr);
+
+ return ret;
+}
+
static int psp_np_fw_load(struct psp_context *psp)
{
int i, ret;
struct amdgpu_firmware_info *ucode;
struct amdgpu_device* adev = psp->adev;
+ if (psp->autoload_supported) {
+ ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
+ if (!ucode->fw)
+ goto out;
+
+ ret = psp_execute_np_fw_load(psp, ucode);
+ if (ret)
+ return ret;
+ }
+
+out:
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (!ucode->fw)
continue;
if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
- psp_smu_reload_quirk(psp))
+ (psp_smu_reload_quirk(psp) || psp->autoload_supported))
continue;
if (amdgpu_sriov_vf(adev) &&
(ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0
@@ -843,16 +971,24 @@ static int psp_np_fw_load(struct psp_context *psp)
|| ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G))
/*skip ucode loading in SRIOV VF */
continue;
+ if (psp->autoload_supported &&
+ (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT ||
+ ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT))
+ /* skip mec JT when autoload is enabled */
+ continue;
- ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd);
- if (ret)
- return ret;
-
- ret = psp_cmd_submit_buf(psp, ucode, psp->cmd,
- psp->fence_buf_mc_addr);
+ ret = psp_execute_np_fw_load(psp, ucode);
if (ret)
return ret;
+ /* Start rlc autoload after psp recieved all the gfx firmware */
+ if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+ ret = psp_rlc_autoload(psp);
+ if (ret) {
+ DRM_ERROR("Failed to start rlc autoload\n");
+ return ret;
+ }
+ }
#if 0
/* check if firmware loaded sucessfully */
if (!amdgpu_psp_check_fw_loading_status(adev, i))
@@ -911,18 +1047,6 @@ static int psp_load_fw(struct amdgpu_device *adev)
goto failed;
}
- ret = psp_tmr_init(psp);
- if (ret) {
- DRM_ERROR("PSP tmr init failed!\n");
- goto failed;
- }
-
- ret = psp_asd_init(psp);
- if (ret) {
- DRM_ERROR("PSP asd init failed!\n");
- goto failed;
- }
-
skip_memalloc:
ret = psp_hw_start(psp);
if (ret)
@@ -1070,6 +1194,39 @@ int psp_gpu_reset(struct amdgpu_device *adev)
return psp_mode1_reset(&adev->psp);
}
+int psp_rlc_autoload_start(struct psp_context *psp)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd;
+
+ if (amdgpu_sriov_vf(psp->adev))
+ return 0;
+
+ cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ cmd->cmd_id = GFX_CMD_ID_AUTOLOAD_RLC;
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+ kfree(cmd);
+ return ret;
+}
+
+int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
+ uint64_t cmd_gpu_addr, int cmd_size)
+{
+ struct amdgpu_firmware_info ucode = {0};
+
+ ucode.ucode_id = inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM :
+ AMDGPU_UCODE_ID_VCN0_RAM;
+ ucode.mc_addr = cmd_gpu_addr;
+ ucode.ucode_size = cmd_size;
+
+ return psp_execute_np_fw_load(&adev->psp, &ucode);
+}
+
static bool psp_check_fw_loading_status(struct amdgpu_device *adev,
enum AMDGPU_UCODE_ID ucode_type)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index cde113f07c96..6039acc84346 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -62,6 +62,14 @@ struct psp_ring
uint32_t ring_size;
};
+/* More registers may will be supported */
+enum psp_reg_prog_id {
+ PSP_REG_IH_RB_CNTL = 0, /* register IH_RB_CNTL */
+ PSP_REG_IH_RB_CNTL_RING1 = 1, /* register IH_RB_CNTL_RING1 */
+ PSP_REG_IH_RB_CNTL_RING2 = 2, /* register IH_RB_CNTL_RING2 */
+ PSP_REG_LAST
+};
+
struct psp_funcs
{
int (*init_microcode)(struct psp_context *psp);
@@ -93,6 +101,20 @@ struct psp_funcs
int (*ras_trigger_error)(struct psp_context *psp,
struct ta_ras_trigger_error_input *info);
int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
+ int (*rlc_autoload_start)(struct psp_context *psp);
+};
+
+#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
+struct psp_xgmi_node_info {
+ uint64_t node_id;
+ uint8_t num_hops;
+ uint8_t is_sharing_enabled;
+ enum ta_xgmi_assigned_sdma_engine sdma_engine;
+};
+
+struct psp_xgmi_topology_info {
+ uint32_t num_nodes;
+ struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};
struct psp_xgmi_context {
@@ -101,6 +123,7 @@ struct psp_xgmi_context {
struct amdgpu_bo *xgmi_shared_bo;
uint64_t xgmi_shared_mc_addr;
void *xgmi_shared_buf;
+ struct psp_xgmi_topology_info top_info;
};
struct psp_ras_context {
@@ -132,8 +155,10 @@ struct psp_context
uint32_t sos_feature_version;
uint32_t sys_bin_size;
uint32_t sos_bin_size;
+ uint32_t toc_bin_size;
uint8_t *sys_start_addr;
uint8_t *sos_start_addr;
+ uint8_t *toc_start_addr;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
@@ -162,6 +187,8 @@ struct psp_context
/* fence value associated with cmd buffer */
atomic_t fence_value;
+ /* flag to mark whether gfx fw autoload is supported or not */
+ bool autoload_supported;
/* xgmi ta firmware and buffer */
const struct firmware *ta_fw;
@@ -181,18 +208,6 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID);
};
-#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
-struct psp_xgmi_node_info {
- uint64_t node_id;
- uint8_t num_hops;
- uint8_t is_sharing_enabled;
- enum ta_xgmi_assigned_sdma_engine sdma_engine;
-};
-
-struct psp_xgmi_topology_info {
- uint32_t num_nodes;
- struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
-};
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
@@ -224,6 +239,8 @@ struct psp_xgmi_topology_info {
#define psp_xgmi_set_topology_info(psp, num_device, topology) \
((psp)->funcs->xgmi_set_topology_info ? \
(psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL)
+#define psp_rlc_autoload(psp) \
+ ((psp)->funcs->rlc_autoload_start ? (psp)->funcs->rlc_autoload_start((psp)) : 0)
#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
@@ -243,12 +260,18 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
int psp_gpu_reset(struct amdgpu_device *adev);
+int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
+ uint64_t cmd_gpu_addr, int cmd_size);
+
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable);
-extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
+int psp_rlc_autoload_start(struct psp_context *psp);
+extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+ uint32_t value);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 22bd21efe6b1..1a4412e47810 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -24,6 +24,8 @@
#include <linux/debugfs.h>
#include <linux/list.h>
#include <linux/module.h>
+#include <linux/uaccess.h>
+
#include "amdgpu.h"
#include "amdgpu_ras.h"
#include "amdgpu_atomfirmware.h"
@@ -90,6 +92,12 @@ struct ras_manager {
struct ras_err_data err_data;
};
+struct ras_badpage {
+ unsigned int bp;
+ unsigned int size;
+ unsigned int flags;
+};
+
const char *ras_error_string[] = {
"none",
"parity",
@@ -118,9 +126,16 @@ const char *ras_block_string[] = {
#define ras_err_str(i) (ras_error_string[ffs(i)])
#define ras_block_str(i) (ras_block_string[i])
-#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
+#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
+#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
+static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
+ uint64_t offset, uint64_t size,
+ struct amdgpu_bo **bo_ptr);
+static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
+ struct amdgpu_bo **bo_ptr);
+
static void amdgpu_ras_self_test(struct amdgpu_device *adev)
{
/* TODO */
@@ -237,8 +252,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
return 0;
}
-/*
- * DOC: ras debugfs control interface
+/**
+ * DOC: AMDGPU RAS debugfs control interface
*
* It accepts struct ras_debug_if who has two members.
*
@@ -300,6 +315,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
{
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
struct ras_debug_if data;
+ struct amdgpu_bo *bo;
int ret = 0;
ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
@@ -317,7 +333,17 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
break;
case 2:
+ ret = amdgpu_ras_reserve_vram(adev,
+ data.inject.address, PAGE_SIZE, &bo);
+ if (ret) {
+ /* address was offset, now it is absolute.*/
+ data.inject.address += adev->gmc.vram_start;
+ if (data.inject.address > adev->gmc.vram_end)
+ break;
+ } else
+ data.inject.address = amdgpu_bo_gpu_offset(bo);
ret = amdgpu_ras_error_inject(adev, &data.inject);
+ amdgpu_ras_release_vram(adev, &bo);
break;
default:
ret = -EINVAL;
@@ -521,6 +547,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
enable ? "enable":"disable",
ras_block_str(head->block),
ret);
+ if (ret == TA_RAS_STATUS__RESET_NEEDED)
+ return -EAGAIN;
return -EINVAL;
}
@@ -541,16 +569,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
return -EINVAL;
if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
- /* If ras is enabled by vbios, we set up ras object first in
- * both case. For enable, that is all what we need do. For
- * disable, we need perform a ras TA disable cmd after that.
- */
- ret = __amdgpu_ras_feature_enable(adev, head, 1);
- if (ret)
- return ret;
+ if (enable) {
+ /* There is no harm to issue a ras TA cmd regardless of
+ * the currecnt ras state.
+ * If current state == target state, it will do nothing
+ * But sometimes it requests driver to reset and repost
+ * with error code -EAGAIN.
+ */
+ ret = amdgpu_ras_feature_enable(adev, head, 1);
+ /* With old ras TA, we might fail to enable ras.
+ * Log it and just setup the object.
+ * TODO need remove this WA in the future.
+ */
+ if (ret == -EINVAL) {
+ ret = __amdgpu_ras_feature_enable(adev, head, 1);
+ if (!ret)
+ DRM_INFO("RAS INFO: %s setup object\n",
+ ras_block_str(head->block));
+ }
+ } else {
+ /* setup the object then issue a ras TA disable cmd.*/
+ ret = __amdgpu_ras_feature_enable(adev, head, 1);
+ if (ret)
+ return ret;
- if (!enable)
ret = amdgpu_ras_feature_enable(adev, head, 0);
+ }
} else
ret = amdgpu_ras_feature_enable(adev, head, enable);
@@ -691,6 +735,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
/* sysfs begin */
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage **bps, unsigned int *count);
+
+static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
+{
+ switch (flags) {
+ case 0:
+ return "R";
+ case 1:
+ return "P";
+ case 2:
+ default:
+ return "F";
+ };
+}
+
+/*
+ * DOC: ras sysfs gpu_vram_bad_pages interface
+ *
+ * It allows user to read the bad pages of vram on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
+ *
+ * It outputs multiple lines, and each line stands for one gpu page.
+ *
+ * The format of one line is below,
+ * gpu pfn : gpu page size : flags
+ *
+ * gpu pfn and gpu page size are printed in hex format.
+ * flags can be one of below character,
+ * R: reserved, this gpu page is reserved and not able to use.
+ * P: pending for reserve, this gpu page is marked as bad, will be reserved
+ * in next window of page_reserve.
+ * F: unable to reserve. this gpu page can't be reserved due to some reasons.
+ *
+ * examples:
+ * 0x00000001 : 0x00001000 : R
+ * 0x00000002 : 0x00001000 : P
+ */
+
+static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
+ struct kobject *kobj, struct bin_attribute *attr,
+ char *buf, loff_t ppos, size_t count)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, badpages_attr);
+ struct amdgpu_device *adev = con->adev;
+ const unsigned int element_size =
+ sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
+ unsigned int start = div64_ul(ppos + element_size - 1, element_size);
+ unsigned int end = div64_ul(ppos + count - 1, element_size);
+ ssize_t s = 0;
+ struct ras_badpage *bps = NULL;
+ unsigned int bps_count = 0;
+
+ memset(buf, 0, count);
+
+ if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
+ return 0;
+
+ for (; start < end && start < bps_count; start++)
+ s += scnprintf(&buf[s], element_size + 1,
+ "0x%08x : 0x%08x : %1s\n",
+ bps[start].bp,
+ bps[start].size,
+ amdgpu_ras_badpage_flags_str(bps[start].flags));
+
+ kfree(bps);
+
+ return s;
+}
+
static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -731,9 +846,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
&con->features_attr.attr,
NULL
};
+ struct bin_attribute *bin_attrs[] = {
+ &con->badpages_attr,
+ NULL
+ };
struct attribute_group group = {
.name = "ras",
.attrs = attrs,
+ .bin_attrs = bin_attrs,
};
con->features_attr = (struct device_attribute) {
@@ -743,7 +863,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
},
.show = amdgpu_ras_sysfs_features_read,
};
+
+ con->badpages_attr = (struct bin_attribute) {
+ .attr = {
+ .name = "gpu_vram_bad_pages",
+ .mode = S_IRUGO,
+ },
+ .size = 0,
+ .private = NULL,
+ .read = amdgpu_ras_sysfs_badpages_read,
+ };
+
sysfs_attr_init(attrs[0]);
+ sysfs_bin_attr_init(bin_attrs[0]);
return sysfs_create_group(&adev->dev->kobj, &group);
}
@@ -755,9 +887,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
&con->features_attr.attr,
NULL
};
+ struct bin_attribute *bin_attrs[] = {
+ &con->badpages_attr,
+ NULL
+ };
struct attribute_group group = {
.name = "ras",
.attrs = attrs,
+ .bin_attrs = bin_attrs,
};
sysfs_remove_group(&adev->dev->kobj, &group);
@@ -833,40 +970,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
/* sysfs end */
/* debugfs begin */
-static int amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
+static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct drm_minor *minor = adev->ddev->primary;
- struct dentry *root = minor->debugfs_root, *dir;
- struct dentry *ent;
- dir = debugfs_create_dir("ras", root);
- if (IS_ERR(dir))
- return -EINVAL;
-
- con->dir = dir;
-
- ent = debugfs_create_file("ras_ctrl",
- S_IWUGO | S_IRUGO, con->dir,
- adev, &amdgpu_ras_debugfs_ctrl_ops);
- if (IS_ERR(ent)) {
- debugfs_remove(con->dir);
- return -EINVAL;
- }
-
- con->ent = ent;
- return 0;
+ con->dir = debugfs_create_dir("ras", minor->debugfs_root);
+ con->ent = debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
+ adev, &amdgpu_ras_debugfs_ctrl_ops);
}
-int amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
+void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
struct ras_fs_if *head)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
- struct dentry *ent;
if (!obj || obj->ent)
- return -EINVAL;
+ return;
get_obj(obj);
@@ -874,34 +995,25 @@ int amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
head->debugfs_name,
sizeof(obj->fs_data.debugfs_name));
- ent = debugfs_create_file(obj->fs_data.debugfs_name,
- S_IWUGO | S_IRUGO, con->dir,
- obj, &amdgpu_ras_debugfs_ops);
-
- if (IS_ERR(ent))
- return -EINVAL;
-
- obj->ent = ent;
-
- return 0;
+ obj->ent = debugfs_create_file(obj->fs_data.debugfs_name,
+ S_IWUGO | S_IRUGO, con->dir, obj,
+ &amdgpu_ras_debugfs_ops);
}
-int amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
+void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
struct ras_common_if *head)
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
if (!obj || !obj->ent)
- return 0;
+ return;
debugfs_remove(obj->ent);
obj->ent = NULL;
put_obj(obj);
-
- return 0;
}
-static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
+static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj, *tmp;
@@ -914,8 +1026,6 @@ static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
debugfs_remove(con->dir);
con->dir = NULL;
con->ent = NULL;
-
- return 0;
}
/* debugfs end */
@@ -1089,6 +1199,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
/* ih end */
/* recovery begin */
+
+/* return 0 on success.
+ * caller need free bps.
+ */
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+ struct ras_badpage **bps, unsigned int *count)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_err_handler_data *data;
+ int i = 0;
+ int ret = 0;
+
+ if (!con || !con->eh_data || !bps || !count)
+ return -EINVAL;
+
+ mutex_lock(&con->recovery_lock);
+ data = con->eh_data;
+ if (!data || data->count == 0) {
+ *bps = NULL;
+ goto out;
+ }
+
+ *bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
+ if (!*bps) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (; i < data->count; i++) {
+ (*bps)[i] = (struct ras_badpage){
+ .bp = data->bps[i].bp,
+ .size = AMDGPU_GPU_PAGE_SIZE,
+ .flags = 0,
+ };
+
+ if (data->last_reserved <= i)
+ (*bps)[i].flags = 1;
+ else if (data->bps[i].bo == NULL)
+ (*bps)[i].flags = 2;
+ }
+
+ *count = data->count;
+out:
+ mutex_unlock(&con->recovery_lock);
+ return ret;
+}
+
static void amdgpu_ras_do_recovery(struct work_struct *work)
{
struct amdgpu_ras *ras =
@@ -1340,6 +1497,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
}
/* recovery end */
+/* return 0 if ras will reset gpu and repost.*/
+int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
+ unsigned int block)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (!ras)
+ return -EINVAL;
+
+ ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
+ return 0;
+}
+
/*
* check hardware's ras ability which will be saved in hw_supported.
* if hardware does not support ras, we can skip some ras initializtion and
@@ -1415,8 +1585,10 @@ recovery_out:
return -EINVAL;
}
-/* do some init work after IP late init as dependence */
-void amdgpu_ras_post_init(struct amdgpu_device *adev)
+/* do some init work after IP late init as dependence.
+ * and it runs in resume/gpu reset/booting up cases.
+ */
+void amdgpu_ras_resume(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj, *tmp;
@@ -1444,6 +1616,32 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev)
}
}
}
+
+ if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
+ con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
+ /* setup ras obj state as disabled.
+ * for init_by_vbios case.
+ * if we want to enable ras, just enable it in a normal way.
+ * If we want do disable it, need setup ras obj as enabled,
+ * then issue another TA disable cmd.
+ * See feature_enable_on_boot
+ */
+ amdgpu_ras_disable_all_features(adev, 1);
+ amdgpu_ras_reset_gpu(adev, 0);
+ }
+}
+
+void amdgpu_ras_suspend(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ if (!con)
+ return;
+
+ amdgpu_ras_disable_all_features(adev, 0);
+ /* Make sure all ras objects are disabled. */
+ if (con->features)
+ amdgpu_ras_disable_all_features(adev, 1);
}
/* do some fini work before IP fini as dependence */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 24c6e5fcda86..b2841195bd3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -93,6 +93,7 @@ struct amdgpu_ras {
struct dentry *ent;
/* sysfs */
struct device_attribute features_attr;
+ struct bin_attribute badpages_attr;
/* block array */
struct ras_manager *objs;
@@ -177,6 +178,12 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
return ras && (ras->supported & (1 << block));
}
+int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
+ unsigned int block);
+
+void amdgpu_ras_resume(struct amdgpu_device *adev);
+void amdgpu_ras_suspend(struct amdgpu_device *adev);
+
int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
bool is_ce);
@@ -189,13 +196,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
bool is_baco)
{
- /* remove me when gpu reset works on vega20 A1. */
-#if 0
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
schedule_work(&ras->recovery_work);
-#endif
return 0;
}
@@ -257,7 +261,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
/* called in ip_init and ip_fini */
int amdgpu_ras_init(struct amdgpu_device *adev);
-void amdgpu_ras_post_init(struct amdgpu_device *adev);
int amdgpu_ras_fini(struct amdgpu_device *adev);
int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
@@ -273,10 +276,10 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
struct ras_common_if *head);
-int amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
+void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
struct ras_fs_if *head);
-int amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
+void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
struct ras_common_if *head);
int amdgpu_ras_error_query(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 8f5026c123ef..e5c83e164d82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -28,8 +28,9 @@
*/
#include <linux/seq_file.h>
#include <linux/slab.h>
+#include <linux/uaccess.h>
#include <linux/debugfs.h>
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "atom.h"
@@ -281,6 +282,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r;
}
+ r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs);
+ if (r) {
+ dev_err(adev->dev,
+ "(%d) ring trail_fence_offs wb alloc failed\n", r);
+ return r;
+ }
+ ring->trail_fence_gpu_addr =
+ adev->wb.gpu_addr + (ring->trail_fence_offs * 4);
+ ring->trail_fence_cpu_addr = &adev->wb.wb[ring->trail_fence_offs];
+
r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs);
if (r) {
dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
@@ -399,7 +410,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
{
ktime_t deadline = ktime_add_us(ktime_get(), 10000);
- if (!ring->funcs->soft_recovery || !fence)
+ if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;
atomic_inc(&ring->adev->gpu_reset_counter);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index d7fae2676269..4410c97ac9b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -29,8 +29,8 @@
#include <drm/drm_print.h>
/* max number of rings */
-#define AMDGPU_MAX_RINGS 23
-#define AMDGPU_MAX_GFX_RINGS 1
+#define AMDGPU_MAX_RINGS 24
+#define AMDGPU_MAX_GFX_RINGS 2
#define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 3
#define AMDGPU_MAX_UVD_ENC_RINGS 2
@@ -114,6 +114,7 @@ struct amdgpu_ring_funcs {
uint32_t align_mask;
u32 nop;
bool support_64bit_ptrs;
+ bool no_user_fence;
unsigned vmhub;
unsigned extra_dw;
@@ -171,6 +172,7 @@ struct amdgpu_ring_funcs {
enum drm_sched_priority priority);
/* Try to soft recover the ring to make the fence signal */
void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
+ int (*preempt_ib)(struct amdgpu_ring *ring);
};
struct amdgpu_ring {
@@ -205,6 +207,10 @@ struct amdgpu_ring {
unsigned fence_offs;
uint64_t current_ctx;
char name[16];
+ u32 trail_seq;
+ unsigned trail_fence_offs;
+ u64 trail_fence_gpu_addr;
+ volatile u32 *trail_fence_cpu_addr;
unsigned cond_exe_offs;
u64 cond_exe_gpu_addr;
volatile u32 *cond_exe_cpu_addr;
@@ -245,6 +251,7 @@ struct amdgpu_ring {
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
@@ -265,6 +272,12 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence);
+static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring,
+ bool cond_exec)
+{
+ *ring->cond_exe_cpu_addr = cond_exec;
+}
+
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{
int i = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index 49a8ab52113b..d3d4707f2168 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -26,6 +26,94 @@
#include "clearstate_defs.h"
+/* firmware ID used in rlc toc */
+typedef enum _FIRMWARE_ID_ {
+ FIRMWARE_ID_INVALID = 0,
+ FIRMWARE_ID_RLC_G_UCODE = 1,
+ FIRMWARE_ID_RLC_TOC = 2,
+ FIRMWARE_ID_RLCG_SCRATCH = 3,
+ FIRMWARE_ID_RLC_SRM_ARAM = 4,
+ FIRMWARE_ID_RLC_SRM_INDEX_ADDR = 5,
+ FIRMWARE_ID_RLC_SRM_INDEX_DATA = 6,
+ FIRMWARE_ID_RLC_P_UCODE = 7,
+ FIRMWARE_ID_RLC_V_UCODE = 8,
+ FIRMWARE_ID_RLX6_UCODE = 9,
+ FIRMWARE_ID_RLX6_DRAM_BOOT = 10,
+ FIRMWARE_ID_GLOBAL_TAP_DELAYS = 11,
+ FIRMWARE_ID_SE0_TAP_DELAYS = 12,
+ FIRMWARE_ID_SE1_TAP_DELAYS = 13,
+ FIRMWARE_ID_GLOBAL_SE0_SE1_SKEW_DELAYS = 14,
+ FIRMWARE_ID_SDMA0_UCODE = 15,
+ FIRMWARE_ID_SDMA0_JT = 16,
+ FIRMWARE_ID_SDMA1_UCODE = 17,
+ FIRMWARE_ID_SDMA1_JT = 18,
+ FIRMWARE_ID_CP_CE = 19,
+ FIRMWARE_ID_CP_PFP = 20,
+ FIRMWARE_ID_CP_ME = 21,
+ FIRMWARE_ID_CP_MEC = 22,
+ FIRMWARE_ID_CP_MES = 23,
+ FIRMWARE_ID_MES_STACK = 24,
+ FIRMWARE_ID_RLC_SRM_DRAM_SR = 25,
+ FIRMWARE_ID_RLCG_SCRATCH_SR = 26,
+ FIRMWARE_ID_RLCP_SCRATCH_SR = 27,
+ FIRMWARE_ID_RLCV_SCRATCH_SR = 28,
+ FIRMWARE_ID_RLX6_DRAM_SR = 29,
+ FIRMWARE_ID_SDMA0_PG_CONTEXT = 30,
+ FIRMWARE_ID_SDMA1_PG_CONTEXT = 31,
+ FIRMWARE_ID_GLOBAL_MUX_SELECT_RAM = 32,
+ FIRMWARE_ID_SE0_MUX_SELECT_RAM = 33,
+ FIRMWARE_ID_SE1_MUX_SELECT_RAM = 34,
+ FIRMWARE_ID_ACCUM_CTRL_RAM = 35,
+ FIRMWARE_ID_RLCP_CAM = 36,
+ FIRMWARE_ID_RLC_SPP_CAM_EXT = 37,
+ FIRMWARE_ID_MAX = 38,
+} FIRMWARE_ID;
+
+typedef struct _RLC_TABLE_OF_CONTENT {
+ union {
+ unsigned int DW0;
+ struct {
+ unsigned int offset : 25;
+ unsigned int id : 7;
+ };
+ };
+
+ union {
+ unsigned int DW1;
+ struct {
+ unsigned int load_at_boot : 1;
+ unsigned int load_at_vddgfx : 1;
+ unsigned int load_at_reset : 1;
+ unsigned int memory_destination : 2;
+ unsigned int vfflr_image_code : 4;
+ unsigned int load_mode_direct : 1;
+ unsigned int save_for_vddgfx : 1;
+ unsigned int save_for_vfflr : 1;
+ unsigned int reserved : 1;
+ unsigned int signed_source : 1;
+ unsigned int size : 18;
+ };
+ };
+
+ union {
+ unsigned int DW2;
+ struct {
+ unsigned int indirect_addr_reg : 16;
+ unsigned int index : 16;
+ };
+ };
+
+ union {
+ unsigned int DW3;
+ struct {
+ unsigned int indirect_data_reg : 16;
+ unsigned int indirect_start_offset : 16;
+ };
+ };
+} RLC_TABLE_OF_CONTENT;
+
+#define RLC_TOC_MAX_SIZE 64
+
struct amdgpu_rlc_funcs {
bool (*is_rlc_enabled)(struct amdgpu_device *adev);
void (*set_safe_mode)(struct amdgpu_device *adev);
@@ -85,6 +173,16 @@ struct amdgpu_rlc {
u8 *save_restore_list_srm;
bool is_rlc_v2_1;
+
+ /* for rlc autoload */
+ struct amdgpu_bo *rlc_autoload_bo;
+ u64 rlc_autoload_gpu_addr;
+ void *rlc_autoload_ptr;
+
+ /* rlc toc buffer */
+ struct amdgpu_bo *rlc_toc_bo;
+ uint64_t rlc_toc_gpu_addr;
+ void *rlc_toc_buf;
};
void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index bfaf5c6323be..0bd1d4ffc19e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -41,7 +41,7 @@
* If we are asked to block we wait on all the oldest fence of all
* rings. We just wait for any of those fence to complete.
*/
-#include <drm/drmP.h>
+
#include "amdgpu.h"
static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 639297250c21..c799691dfa84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -23,8 +23,11 @@
*/
#include <linux/fdtable.h>
+#include <linux/file.h>
#include <linux/pid.h>
+
#include <drm/amdgpu_drm.h>
+
#include "amdgpu.h"
#include "amdgpu_vm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
index 2a1a0c734bdd..12299fd95691 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
@@ -25,7 +25,10 @@
#ifndef __AMDGPU_SCHED_H__
#define __AMDGPU_SCHED_H__
-#include <drm/drmP.h>
+enum drm_sched_priority;
+
+struct drm_device;
+struct drm_file;
enum drm_sched_priority amdgpu_to_sched_priority(int amdgpu_priority);
int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 115bb0c99b0f..5c13c503e61f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -20,10 +20,14 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_sdma.h"
+#define AMDGPU_CSA_SDMA_SIZE 64
+/* SDMA CSA reside in the 3rd page of CSA */
+#define AMDGPU_CSA_SDMA_OFFSET (4096 * 2)
+
/*
* GPU SDMA IP block helpers function.
*/
@@ -56,3 +60,26 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index)
return -EINVAL;
}
+
+uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
+ unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint64_t csa_mc_addr;
+ uint32_t index = 0;
+ int r;
+
+ if (vmid == 0 || !amdgpu_mcbp)
+ return 0;
+
+ r = amdgpu_sdma_get_index_from_ring(ring, &index);
+
+ if (r || index > 31)
+ csa_mc_addr = 0;
+ else
+ csa_mc_addr = amdgpu_csa_vaddr(adev) +
+ AMDGPU_CSA_SDMA_OFFSET +
+ index * AMDGPU_CSA_SDMA_SIZE;
+
+ return csa_mc_addr;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 1ba9ba3b54f7..35dd152f9d5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -97,5 +97,5 @@ struct amdgpu_buffer_funcs {
struct amdgpu_sdma_instance *
amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring);
int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index);
-
+uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h
new file mode 100644
index 000000000000..f4176cb01790
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_SOCBB_H__
+#define __AMDGPU_SOCBB_H__
+
+struct gpu_info_voltage_scaling_v1_0 {
+ uint32_t state;
+ uint32_t dscclk_mhz;
+ uint32_t dcfclk_mhz;
+ uint32_t socclk_mhz;
+ uint32_t dram_speed_mts;
+ uint32_t fabricclk_mhz;
+ uint32_t dispclk_mhz;
+ uint32_t phyclk_mhz;
+ uint32_t dppclk_mhz;
+};
+
+struct gpu_info_soc_bounding_box_v1_0 {
+ uint32_t sr_exit_time_us;
+ uint32_t sr_enter_plus_exit_time_us;
+ uint32_t urgent_latency_us;
+ uint32_t urgent_latency_pixel_data_only_us;
+ uint32_t urgent_latency_pixel_mixed_with_vm_data_us;
+ uint32_t urgent_latency_vm_data_only_us;
+ uint32_t writeback_latency_us;
+ uint32_t ideal_dram_bw_after_urgent_percent;
+ uint32_t pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
+ uint32_t pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm;
+ uint32_t pct_ideal_dram_sdp_bw_after_urgent_vm_only;
+ uint32_t max_avg_sdp_bw_use_normal_percent;
+ uint32_t max_avg_dram_bw_use_normal_percent;
+ uint32_t max_request_size_bytes;
+ uint32_t downspread_percent;
+ uint32_t dram_page_open_time_ns;
+ uint32_t dram_rw_turnaround_time_ns;
+ uint32_t dram_return_buffer_per_channel_bytes;
+ uint32_t dram_channel_width_bytes;
+ uint32_t fabric_datapath_to_dcn_data_return_bytes;
+ uint32_t dcn_downspread_percent;
+ uint32_t dispclk_dppclk_vco_speed_mhz;
+ uint32_t dfs_vco_period_ps;
+ uint32_t urgent_out_of_order_return_per_channel_pixel_only_bytes;
+ uint32_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes;
+ uint32_t urgent_out_of_order_return_per_channel_vm_only_bytes;
+ uint32_t round_trip_ping_latency_dcfclk_cycles;
+ uint32_t urgent_out_of_order_return_per_channel_bytes;
+ uint32_t channel_interleave_bytes;
+ uint32_t num_banks;
+ uint32_t num_chans;
+ uint32_t vmm_page_size_bytes;
+ uint32_t dram_clock_change_latency_us;
+ uint32_t writeback_dram_clock_change_latency_us;
+ uint32_t return_bus_width_bytes;
+ uint32_t voltage_override;
+ uint32_t xfc_bus_transport_time_us;
+ uint32_t xfc_xbuf_latency_tolerance_us;
+ uint32_t use_urgent_burst_bw;
+ uint32_t num_states;
+ struct gpu_info_voltage_scaling_v1_0 clock_limits[8];
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 2d6f5ec77a68..9828f3c7c655 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -28,7 +28,6 @@
* Christian König <christian.koenig@amd.com>
*/
-#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 8904e62dca7a..b66d29d5ffa2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -22,7 +22,7 @@
*
* Authors: Michel Dänzer
*/
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_uvd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index d3ca2424b5fe..77674a7b9616 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -28,8 +28,6 @@
#include <linux/types.h>
#include <linux/tracepoint.h>
-#include <drm/drmP.h>
-
#undef TRACE_SYSTEM
#define TRACE_SYSTEM amdgpu
#define TRACE_INCLUDE_FILE amdgpu_trace
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
index f212402570a5..57c6c39ba064 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
@@ -21,7 +21,7 @@
*
* Author : Dave Airlie <airlied@redhat.com>
*/
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0c52d1f9fe0f..5c05644b9b96 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -29,20 +29,26 @@
* Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
* Dave Airlie
*/
+
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/hmm.h>
+#include <linux/pagemap.h>
+#include <linux/sched/task.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/swiotlb.h>
+
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_module.h>
#include <drm/ttm/ttm_page_alloc.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
#include <drm/amdgpu_drm.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/swiotlb.h>
-#include <linux/swap.h>
-#include <linux/pagemap.h>
-#include <linux/debugfs.h>
-#include <linux/iommu.h>
+
#include "amdgpu.h"
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
@@ -703,143 +709,183 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
/*
* TTM backend functions.
*/
-struct amdgpu_ttm_gup_task_list {
- struct list_head list;
- struct task_struct *task;
-};
-
struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
u64 offset;
uint64_t userptr;
struct task_struct *usertask;
uint32_t userflags;
- spinlock_t guptasklock;
- struct list_head guptasks;
- atomic_t mmu_invalidations;
- uint32_t last_set_pages;
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+ struct hmm_range *range;
+#endif
};
/**
- * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
- * pointer to memory
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
+ * memory and start HMM tracking CPU page table update
*
- * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
- * This provides a wrapper around the get_user_pages() call to provide
- * device accessible pages that back user memory.
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
+ * once afterwards to stop HMM tracking
*/
-int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+
+#define MAX_RETRY_HMM_RANGE_FAULT 16
+
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
{
+ struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL;
+ struct ttm_tt *ttm = bo->tbo.ttm;
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
- unsigned int flags = 0;
- unsigned pinned = 0;
- int r;
+ unsigned long start = gtt->userptr;
+ struct vm_area_struct *vma;
+ struct hmm_range *range;
+ unsigned long i;
+ uint64_t *pfns;
+ int retry = 0;
+ int r = 0;
if (!mm) /* Happens during process shutdown */
return -ESRCH;
- if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
- flags |= FOLL_WRITE;
+ if (unlikely(!mirror)) {
+ DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n");
+ r = -EFAULT;
+ goto out;
+ }
- down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ if (unlikely(!vma || start < vma->vm_start)) {
+ r = -EFAULT;
+ goto out;
+ }
+ if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+ vma->vm_file)) {
+ r = -EPERM;
+ goto out;
+ }
- if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
- /*
- * check that we only use anonymous memory to prevent problems
- * with writeback
- */
- unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
- struct vm_area_struct *vma;
+ range = kzalloc(sizeof(*range), GFP_KERNEL);
+ if (unlikely(!range)) {
+ r = -ENOMEM;
+ goto out;
+ }
- vma = find_vma(mm, gtt->userptr);
- if (!vma || vma->vm_file || vma->vm_end < end) {
- up_read(&mm->mmap_sem);
- return -EPERM;
- }
+ pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
+ if (unlikely(!pfns)) {
+ r = -ENOMEM;
+ goto out_free_ranges;
}
- /* loop enough times using contiguous pages of memory */
- do {
- unsigned num_pages = ttm->num_pages - pinned;
- uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
- struct page **p = pages + pinned;
- struct amdgpu_ttm_gup_task_list guptask;
+ amdgpu_hmm_init_range(range);
+ range->default_flags = range->flags[HMM_PFN_VALID];
+ range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
+ 0 : range->flags[HMM_PFN_WRITE];
+ range->pfn_flags_mask = 0;
+ range->pfns = pfns;
+ hmm_range_register(range, mirror, start,
+ start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT);
+
+retry:
+ /*
+ * Just wait for range to be valid, safe to ignore return value as we
+ * will use the return value of hmm_range_fault() below under the
+ * mmap_sem to ascertain the validity of the range.
+ */
+ hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
+
+ down_read(&mm->mmap_sem);
- guptask.task = current;
- spin_lock(&gtt->guptasklock);
- list_add(&guptask.list, &gtt->guptasks);
- spin_unlock(&gtt->guptasklock);
+ r = hmm_range_fault(range, true);
+ if (unlikely(r < 0)) {
+ if (likely(r == -EAGAIN)) {
+ /*
+ * return -EAGAIN, mmap_sem is dropped
+ */
+ if (retry++ < MAX_RETRY_HMM_RANGE_FAULT)
+ goto retry;
+ else
+ pr_err("Retry hmm fault too many times\n");
+ }
- if (mm == current->mm)
- r = get_user_pages(userptr, num_pages, flags, p, NULL);
- else
- r = get_user_pages_remote(gtt->usertask,
- mm, userptr, num_pages,
- flags, p, NULL, NULL);
+ goto out_up_read;
+ }
- spin_lock(&gtt->guptasklock);
- list_del(&guptask.list);
- spin_unlock(&gtt->guptasklock);
+ up_read(&mm->mmap_sem);
- if (r < 0)
- goto release_pages;
+ for (i = 0; i < ttm->num_pages; i++) {
+ pages[i] = hmm_device_entry_to_page(range, pfns[i]);
+ if (unlikely(!pages[i])) {
+ pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
+ i, pfns[i]);
+ r = -ENOMEM;
- pinned += r;
+ goto out_free_pfns;
+ }
+ }
- } while (pinned < ttm->num_pages);
+ gtt->range = range;
- up_read(&mm->mmap_sem);
return 0;
-release_pages:
- release_pages(pages, pinned);
- up_read(&mm->mmap_sem);
+out_up_read:
+ if (likely(r != -EAGAIN))
+ up_read(&mm->mmap_sem);
+out_free_pfns:
+ hmm_range_unregister(range);
+ kvfree(pfns);
+out_free_ranges:
+ kfree(range);
+out:
return r;
}
/**
- * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
+ * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
+ * Check if the pages backing this ttm range have been invalidated
*
- * Called by amdgpu_cs_list_validate(). This creates the page list
- * that backs user memory and will ultimately be mapped into the device
- * address space.
+ * Returns: true if pages are still valid
*/
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- unsigned i;
+ bool r = false;
+
+ if (!gtt || !gtt->userptr)
+ return false;
- gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
- for (i = 0; i < ttm->num_pages; ++i) {
- if (ttm->pages[i])
- put_page(ttm->pages[i]);
+ DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n",
+ gtt->userptr, ttm->num_pages);
- ttm->pages[i] = pages ? pages[i] : NULL;
+ WARN_ONCE(!gtt->range || !gtt->range->pfns,
+ "No user pages to check\n");
+
+ if (gtt->range) {
+ r = hmm_range_valid(gtt->range);
+ hmm_range_unregister(gtt->range);
+
+ kvfree(gtt->range->pfns);
+ kfree(gtt->range);
+ gtt->range = NULL;
}
+
+ return r;
}
+#endif
/**
- * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
+ * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
*
- * Called while unpinning userptr pages
+ * Called by amdgpu_cs_list_validate(). This creates the page list
+ * that backs user memory and will ultimately be mapped into the device
+ * address space.
*/
-void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- unsigned i;
-
- for (i = 0; i < ttm->num_pages; ++i) {
- struct page *page = ttm->pages[i];
+ unsigned long i;
- if (!page)
- continue;
-
- if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
- set_page_dirty(page);
-
- mark_page_accessed(page);
- }
+ for (i = 0; i < ttm->num_pages; ++i)
+ ttm->pages[i] = pages ? pages[i] : NULL;
}
/**
@@ -901,10 +947,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
/* unmap the pages mapped to the device */
dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
- /* mark the pages as dirty */
- amdgpu_ttm_tt_mark_user_pages(ttm);
-
sg_free_table(ttm->sg);
+
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+ if (gtt->range &&
+ ttm->pages[0] == hmm_device_entry_to_page(gtt->range,
+ gtt->range->pfns[0]))
+ WARN_ONCE(1, "Missing get_user_page_done\n");
+#endif
}
int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
@@ -925,8 +975,8 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
goto gart_bind_fail;
/* Patch mtype of the second part BO */
- flags &= ~AMDGPU_PTE_MTYPE_MASK;
- flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC);
+ flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
+ flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
r = amdgpu_gart_bind(adev,
gtt->offset + (page_idx << PAGE_SHIFT),
@@ -1254,11 +1304,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
gtt->usertask = current->group_leader;
get_task_struct(gtt->usertask);
- spin_lock_init(&gtt->guptasklock);
- INIT_LIST_HEAD(&gtt->guptasks);
- atomic_set(&gtt->mmu_invalidations, 0);
- gtt->last_set_pages = 0;
-
return 0;
}
@@ -1287,7 +1332,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- struct amdgpu_ttm_gup_task_list *entry;
unsigned long size;
if (gtt == NULL || !gtt->userptr)
@@ -1300,48 +1344,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
- /* Search the lists of tasks that hold this mapping and see
- * if current is one of them. If it is return false.
- */
- spin_lock(&gtt->guptasklock);
- list_for_each_entry(entry, &gtt->guptasks, list) {
- if (entry->task == current) {
- spin_unlock(&gtt->guptasklock);
- return false;
- }
- }
- spin_unlock(&gtt->guptasklock);
-
- atomic_inc(&gtt->mmu_invalidations);
-
return true;
}
/**
- * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
- */
-bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
- int *last_invalidated)
-{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- int prev_invalidated = *last_invalidated;
-
- *last_invalidated = atomic_read(&gtt->mmu_invalidations);
- return prev_invalidated != *last_invalidated;
-}
-
-/**
- * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
- * been invalidated since the last time they've been set?
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
*/
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
if (gtt == NULL || !gtt->userptr)
return false;
- return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
+ return true;
}
/**
@@ -1753,44 +1769,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* Initialize various on-chip memory pools */
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
- adev->gds.mem.total_size);
+ adev->gds.gds_size);
if (r) {
DRM_ERROR("Failed initializing GDS heap.\n");
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
- 4, AMDGPU_GEM_DOMAIN_GDS,
- &adev->gds.gds_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
- adev->gds.gws.total_size);
+ adev->gds.gws_size);
if (r) {
DRM_ERROR("Failed initializing gws heap.\n");
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
- 1, AMDGPU_GEM_DOMAIN_GWS,
- &adev->gds.gws_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
- adev->gds.oa.total_size);
+ adev->gds.oa_size);
if (r) {
DRM_ERROR("Failed initializing oa heap.\n");
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
- 1, AMDGPU_GEM_DOMAIN_OA,
- &adev->gds.oa_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
/* Register debugfs entries for amdgpu_ttm */
r = amdgpu_ttm_debugfs_init(adev);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index b5b2d101f7db..caa76c693700 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -101,9 +101,22 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
-int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages);
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
+#else
+static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
+ struct page **pages)
+{
+ return -EPERM;
+}
+static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
+{
+ return false;
+}
+#endif
+
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
-void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
uint32_t flags);
bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
@@ -112,7 +125,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end);
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated);
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 7b33867036e7..c352a519ddd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -24,7 +24,7 @@
#include <linux/firmware.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_ucode.h"
@@ -77,6 +77,14 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr)
container_of(hdr, struct smc_firmware_header_v1_0, header);
DRM_DEBUG("ucode_start_addr: %u\n", le32_to_cpu(smc_hdr->ucode_start_addr));
+ } else if (version_major == 2) {
+ const struct smc_firmware_header_v1_0 *v1_hdr =
+ container_of(hdr, struct smc_firmware_header_v1_0, header);
+ const struct smc_firmware_header_v2_0 *v2_hdr =
+ container_of(v1_hdr, struct smc_firmware_header_v2_0, v1_0);
+
+ DRM_INFO("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes));
+ DRM_INFO("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes));
} else {
DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor);
}
@@ -227,6 +235,40 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr)
}
}
+void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)
+{
+ uint16_t version_major = le16_to_cpu(hdr->header_version_major);
+ uint16_t version_minor = le16_to_cpu(hdr->header_version_minor);
+
+ DRM_DEBUG("PSP\n");
+ amdgpu_ucode_print_common_hdr(hdr);
+
+ if (version_major == 1) {
+ const struct psp_firmware_header_v1_0 *psp_hdr =
+ container_of(hdr, struct psp_firmware_header_v1_0, header);
+
+ DRM_DEBUG("ucode_feature_version: %u\n",
+ le32_to_cpu(psp_hdr->ucode_feature_version));
+ DRM_DEBUG("sos_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr->sos_offset_bytes));
+ DRM_DEBUG("sos_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr->sos_size_bytes));
+ if (version_minor == 1) {
+ const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 =
+ container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0);
+ DRM_DEBUG("toc_header_version: %u\n",
+ le32_to_cpu(psp_hdr_v1_1->toc_header_version));
+ DRM_DEBUG("toc_offset_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_1->toc_offset_bytes));
+ DRM_DEBUG("toc_size_bytes: %u\n",
+ le32_to_cpu(psp_hdr_v1_1->toc_size_bytes));
+ }
+ } else {
+ DRM_ERROR("Unknown PSP ucode version: %u.%u\n",
+ version_major, version_minor);
+ }
+}
+
void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr)
{
uint16_t version_major = le16_to_cpu(hdr->header_version_major);
@@ -302,6 +344,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_RAVEN:
case CHIP_VEGA12:
case CHIP_VEGA20:
+ case CHIP_NAVI10:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
@@ -313,6 +356,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
return AMDGPU_FW_LOAD_DIRECT;
}
+#define FW_VERSION_ATTR(name, mode, field) \
+static ssize_t show_##name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct drm_device *ddev = dev_get_drvdata(dev); \
+ struct amdgpu_device *adev = ddev->dev_private; \
+ \
+ return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field); \
+} \
+static DEVICE_ATTR(name, mode, show_##name, NULL)
+
+FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version);
+FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version);
+FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version);
+FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version);
+FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version);
+FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version);
+FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version);
+FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version);
+FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
+FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
+FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
+FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
+FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version);
+FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version);
+FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version);
+FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version);
+FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
+FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
+FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
+FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
+FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
+
+static struct attribute *fw_attrs[] = {
+ &dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
+ &dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr,
+ &dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr,
+ &dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr,
+ &dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr,
+ &dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr,
+ &dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr,
+ &dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
+ &dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
+ &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
+ &dev_attr_dmcu_fw_version.attr, NULL
+};
+
+static const struct attribute_group fw_attr_group = {
+ .name = "fw_version",
+ .attrs = fw_attrs
+};
+
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
+{
+ return sysfs_create_group(&adev->dev->kobj, &fw_attr_group);
+}
+
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
+{
+ sysfs_remove_group(&adev->dev->kobj, &fw_attr_group);
+}
+
static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
struct amdgpu_firmware_info *ucode,
uint64_t mc_addr, void *kptr)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 7ac25a1c7853..f46944453c6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -23,6 +23,8 @@
#ifndef __AMDGPU_UCODE_H__
#define __AMDGPU_UCODE_H__
+#include "amdgpu_socbb.h"
+
struct common_firmware_header {
uint32_t size_bytes; /* size of the entire header+image(s) in bytes */
uint32_t header_size_bytes; /* size of just the header in bytes */
@@ -49,6 +51,26 @@ struct smc_firmware_header_v1_0 {
uint32_t ucode_start_addr;
};
+/* version_major=2, version_minor=0 */
+struct smc_firmware_header_v2_0 {
+ struct smc_firmware_header_v1_0 v1_0;
+ uint32_t ppt_offset_bytes; /* soft pptable offset */
+ uint32_t ppt_size_bytes; /* soft pptable size */
+};
+
+struct smc_soft_pptable_entry {
+ uint32_t id;
+ uint32_t ppt_offset_bytes;
+ uint32_t ppt_size_bytes;
+};
+
+/* version_major=2, version_minor=1 */
+struct smc_firmware_header_v2_1 {
+ struct smc_firmware_header_v1_0 v1_0;
+ uint32_t pptable_count;
+ uint32_t pptable_entry_offset;
+};
+
/* version_major=1, version_minor=0 */
struct psp_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -57,6 +79,14 @@ struct psp_firmware_header_v1_0 {
uint32_t sos_size_bytes;
};
+/* version_major=1, version_minor=1 */
+struct psp_firmware_header_v1_1 {
+ struct psp_firmware_header_v1_0 v1_0;
+ uint32_t toc_header_version;
+ uint32_t toc_offset_bytes;
+ uint32_t toc_size_bytes;
+};
+
/* version_major=1, version_minor=0 */
struct ta_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -77,6 +107,21 @@ struct gfx_firmware_header_v1_0 {
};
/* version_major=1, version_minor=0 */
+struct mes_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t mes_ucode_version;
+ uint32_t mes_ucode_size_bytes;
+ uint32_t mes_ucode_offset_bytes;
+ uint32_t mes_ucode_data_version;
+ uint32_t mes_ucode_data_size_bytes;
+ uint32_t mes_ucode_data_offset_bytes;
+ uint32_t mes_uc_start_addr_lo;
+ uint32_t mes_uc_start_addr_hi;
+ uint32_t mes_data_start_addr_lo;
+ uint32_t mes_data_start_addr_hi;
+};
+
+/* version_major=1, version_minor=0 */
struct rlc_firmware_header_v1_0 {
struct common_firmware_header header;
uint32_t ucode_feature_version;
@@ -161,6 +206,19 @@ struct gpu_info_firmware_v1_0 {
uint32_t gc_lds_size;
};
+struct gpu_info_firmware_v1_1 {
+ struct gpu_info_firmware_v1_0 v1_0;
+ uint32_t num_sc_per_sh;
+ uint32_t num_packer_per_sc;
+};
+
+/* gpu info payload
+ * version_major=1, version_minor=1 */
+struct gpu_info_firmware_v1_2 {
+ struct gpu_info_firmware_v1_1 v1_1;
+ struct gpu_info_soc_bounding_box_v1_0 soc_bounding_box;
+};
+
/* version_major=1, version_minor=0 */
struct gpu_info_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -180,7 +238,9 @@ union amdgpu_firmware_header {
struct common_firmware_header common;
struct mc_firmware_header_v1_0 mc;
struct smc_firmware_header_v1_0 smc;
+ struct smc_firmware_header_v2_0 smc_v2_0;
struct psp_firmware_header_v1_0 psp;
+ struct psp_firmware_header_v1_1 psp_v1_1;
struct ta_firmware_header_v1_0 ta;
struct gfx_firmware_header_v1_0 gfx;
struct rlc_firmware_header_v1_0 rlc;
@@ -206,6 +266,8 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_MEC1_JT,
AMDGPU_UCODE_ID_CP_MEC2,
AMDGPU_UCODE_ID_CP_MEC2_JT,
+ AMDGPU_UCODE_ID_CP_MES,
+ AMDGPU_UCODE_ID_CP_MES_DATA,
AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
@@ -218,6 +280,8 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_VCN,
AMDGPU_UCODE_ID_DMCU_ERAM,
AMDGPU_UCODE_ID_DMCU_INTV,
+ AMDGPU_UCODE_ID_VCN0_RAM,
+ AMDGPU_UCODE_ID_VCN1_RAM,
AMDGPU_UCODE_ID_MAXIMUM,
};
@@ -232,6 +296,7 @@ enum amdgpu_firmware_load_type {
AMDGPU_FW_LOAD_DIRECT = 0,
AMDGPU_FW_LOAD_SMU,
AMDGPU_FW_LOAD_PSP,
+ AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO,
};
/* conform to smu_ucode_xfer_cz.h */
@@ -284,6 +349,7 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr);
+void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr);
void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
int amdgpu_ucode_validate(const struct firmware *fw);
bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
@@ -291,7 +357,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev);
void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev);
enum amdgpu_firmware_load_type
amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 4e5d13e41f6a..5b2fea3b4a2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -30,7 +30,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
-#include <drm/drmP.h>
+
#include <drm/drm.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index f7189e22f6b7..b70b3c45bb29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -27,7 +27,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
-#include <drm/drmP.h>
+
#include <drm/drm.h>
#include "amdgpu.h"
@@ -1092,7 +1092,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
for (i = 0; i < timeout; i++) {
if (amdgpu_ring_get_rptr(ring) != rptr)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index e6b07ece3910..2e12eeb314a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -26,7 +26,8 @@
#include <linux/firmware.h>
#include <linux/module.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
#include <drm/drm.h>
#include "amdgpu.h"
@@ -45,10 +46,12 @@
#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
+#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
MODULE_FIRMWARE(FIRMWARE_RAVEN2);
+MODULE_FIRMWARE(FIRMWARE_NAVI10);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@@ -71,6 +74,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
else
fw_name = FIRMWARE_RAVEN;
break;
+ case CHIP_NAVI10:
+ fw_name = FIRMWARE_NAVI10;
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = true;
+ break;
default:
return -EINVAL;
}
@@ -132,6 +141,16 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
return r;
}
+ if (adev->vcn.indirect_sram) {
+ r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo,
+ &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r);
+ return r;
+ }
+ }
+
return 0;
}
@@ -141,6 +160,12 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
kvfree(adev->vcn.saved_bo);
+ if (adev->vcn.indirect_sram) {
+ amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo,
+ &adev->vcn.dpg_sram_gpu_addr,
+ (void **)&adev->vcn.dpg_sram_cpu_addr);
+ }
+
amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
&adev->vcn.gpu_addr,
(void **)&adev->vcn.cpu_addr);
@@ -212,132 +237,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
return 0;
}
-static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
- struct dpg_pause_state *new_state)
-{
- int ret_code;
- uint32_t reg_data = 0;
- uint32_t reg_data2 = 0;
- struct amdgpu_ring *ring;
-
- /* pause/unpause if state is changed */
- if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
- DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
- adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
- new_state->fw_based, new_state->jpeg);
-
- reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
- (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
-
- if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
- ret_code = 0;
-
- if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
- UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-
- if (!ret_code) {
- /* pause DPG non-jpeg */
- reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
- WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
- UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
- UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
-
- /* Restore */
- ring = &adev->vcn.ring_enc[0];
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
-
- ring = &adev->vcn.ring_enc[1];
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
- WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
- WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
-
- ring = &adev->vcn.ring_dec;
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
- RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
- UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
- }
- } else {
- /* unpause dpg non-jpeg, no need to wait */
- reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
- WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
- }
- adev->vcn.pause_state.fw_based = new_state->fw_based;
- }
-
- /* pause/unpause if state is changed */
- if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
- DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
- adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
- new_state->fw_based, new_state->jpeg);
-
- reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
- (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
-
- if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
- ret_code = 0;
-
- if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
- UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-
- if (!ret_code) {
- /* Make sure JPRG Snoop is disabled before sending the pause */
- reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
- reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
- WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
-
- /* pause DPG jpeg */
- reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
- WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
- UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
- UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
-
- /* Restore */
- ring = &adev->vcn.ring_jpeg;
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
- UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
- UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
- WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
- UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
-
- ring = &adev->vcn.ring_dec;
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
- RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
- SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
- UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
- }
- } else {
- /* unpause dpg jpeg, no need to wait */
- reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
- WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
- }
- adev->vcn.pause_state.jpeg = new_state->jpeg;
- }
-
- return 0;
-}
-
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
@@ -362,7 +261,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
else
new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
- amdgpu_vcn_pause_dpg_mode(adev, &new_state);
+ adev->vcn.pause_dpg_mode(adev, &new_state);
}
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
@@ -370,7 +269,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
if (fences == 0) {
amdgpu_gfx_off_ctrl(adev, true);
- if (adev->pm.dpm_enabled)
+ if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, false);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
@@ -387,7 +286,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
if (set_clocks) {
amdgpu_gfx_off_ctrl(adev, false);
- if (adev->pm.dpm_enabled)
+ if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, true);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
@@ -417,7 +316,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
new_state.jpeg = VCN_DPG_STATE__PAUSE;
- amdgpu_vcn_pause_dpg_mode(adev, &new_state);
+ adev->vcn.pause_dpg_mode(adev, &new_state);
}
}
@@ -433,20 +332,18 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
- WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
+ WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
-
- amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
+ tmp = RREG32(adev->vcn.external.scratch9);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -472,14 +369,14 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
ib = &job->ibs[0];
addr = amdgpu_bo_gpu_offset(bo);
- ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0);
+ ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
ib->ptr[1] = addr;
- ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0);
+ ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
ib->ptr[3] = addr >> 32;
- ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0);
+ ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
ib->ptr[5] = 0;
for (i = 6; i < 16; i += 2) {
- ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0);
+ ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
ib->ptr[i+1] = 0;
}
ib->length_dw = 16;
@@ -610,7 +507,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
for (i = 0; i < adev->usec_timeout; i++) {
if (amdgpu_ring_get_rptr(ring) != rptr)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -754,22 +651,20 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
- WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
+ WREG32(adev->vcn.external.jpeg_pitch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
-
if (r)
return r;
- amdgpu_ring_write(ring,
- PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0));
+ amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.jpeg_pitch, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
+ tmp = RREG32(adev->vcn.external.jpeg_pitch);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -794,7 +689,7 @@ static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle,
ib = &job->ibs[0];
- ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, PACKETJ_TYPE0);
+ ib->ptr[0] = PACKETJ(adev->vcn.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0);
ib->ptr[1] = 0xDEADBEEF;
for (i = 2; i < 16; i += 2) {
ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
@@ -840,10 +735,10 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
+ tmp = RREG32(adev->vcn.external.jpeg_pitch);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index a0ad19af9080..99f14fcc1460 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -25,7 +25,7 @@
#define __AMDGPU_VCN_H__
#define AMDGPU_VCN_STACK_SIZE (128*1024)
-#define AMDGPU_VCN_CONTEXT_SIZE (512*1024)
+#define AMDGPU_VCN_CONTEXT_SIZE (512*1024)
#define AMDGPU_VCN_FIRMWARE_OFFSET 256
#define AMDGPU_VCN_MAX_ENC_RINGS 3
@@ -45,8 +45,81 @@
#define VCN_ENC_CMD_REG_WRITE 0x0000000b
#define VCN_ENC_CMD_REG_WAIT 0x0000000c
+#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
+#define VCN_AON_SOC_ADDRESS_2_0 0x1f800
+#define VCN_VID_IP_ADDRESS_2_0 0x0
+#define VCN_AON_IP_ADDRESS_2_0 0x30000
+
+#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
+ ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
+ UVD_DPG_LMA_CTL__MASK_EN_MASK | \
+ ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
+ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
+ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); \
+ })
+
+#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
+ do { \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
+ UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
+ ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
+ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
+ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ } while (0)
+
+#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst, reg) \
+ ({ \
+ uint32_t internal_reg_offset, addr; \
+ bool video_range, aon_range; \
+ \
+ addr = (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
+ addr <<= 2; \
+ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \
+ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \
+ aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS_2_0)) && \
+ ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS_2_0 + 0x600))))); \
+ if (video_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS_2_0) + \
+ (VCN_VID_IP_ADDRESS_2_0)); \
+ else if (aon_range) \
+ internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS_2_0) + \
+ (VCN_AON_IP_ADDRESS_2_0)); \
+ else \
+ internal_reg_offset = (0xFFFFF & addr); \
+ \
+ internal_reg_offset >>= 2; \
+ })
+
+#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) \
+ ({ \
+ WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \
+ (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); \
+ })
+
+#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect) \
+ do { \
+ if (!indirect) { \
+ WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \
+ (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \
+ mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \
+ offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \
+ } else { \
+ *adev->vcn.dpg_sram_curr_addr++ = offset; \
+ *adev->vcn.dpg_sram_curr_addr++ = value; \
+ } \
+ } while (0)
+
enum engine_status_constants {
UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
+ UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0,
UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
UVD_STATUS__UVD_BUSY = 0x00000004,
GB_ADDR_CONFIG_DEFAULT = 0x26010011,
@@ -54,6 +127,7 @@ enum engine_status_constants {
UVD_STATUS__BUSY = 0x5,
UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1,
UVD_STATUS__RBC_BUSY = 0x1,
+ UVD_PGFSM_STATUS_UVDJ_PWR_ON = 0,
};
enum internal_dpg_state {
@@ -66,6 +140,15 @@ struct dpg_pause_state {
enum internal_dpg_state jpeg;
};
+struct amdgpu_vcn_reg{
+ unsigned data0;
+ unsigned data1;
+ unsigned cmd;
+ unsigned nop;
+ unsigned scratch9;
+ unsigned jpeg_pitch;
+};
+
struct amdgpu_vcn {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
@@ -81,6 +164,15 @@ struct amdgpu_vcn {
unsigned num_enc_rings;
enum amd_powergating_state cur_state;
struct dpg_pause_state pause_state;
+ struct amdgpu_vcn_reg internal, external;
+ int (*pause_dpg_mode)(struct amdgpu_device *adev,
+ struct dpg_pause_state *new_state);
+
+ bool indirect_sram;
+ struct amdgpu_bo *dpg_sram_bo;
+ void *dpg_sram_cpu_addr;
+ uint64_t dpg_sram_gpu_addr;
+ uint32_t *dpg_sram_curr_addr;
};
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 7d484fad3909..07a7e3820b7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -21,6 +21,10 @@
*
*/
+#include <linux/module.h>
+
+#include <drm/drm_drv.h>
+
#include "amdgpu.h"
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
@@ -426,3 +430,47 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)
return clk;
}
+void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (virt->ops && virt->ops->init_reg_access_mode)
+ virt->ops->init_reg_access_mode(adev);
+}
+
+bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
+{
+ bool ret = false;
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (amdgpu_sriov_vf(adev)
+ && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
+ ret = true;
+
+ return ret;
+}
+
+bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
+{
+ bool ret = false;
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (amdgpu_sriov_vf(adev)
+ && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC)
+ && !(amdgpu_sriov_runtime(adev)))
+ ret = true;
+
+ return ret;
+}
+
+bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
+{
+ bool ret = false;
+ struct amdgpu_virt *virt = &adev->virt;
+
+ if (amdgpu_sriov_vf(adev)
+ && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
+ ret = true;
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 584947b7ccf3..f5107731e9c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer {
uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
};
+/* According to the fw feature, some new reg access modes are supported */
+#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */
+#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */
+#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */
+#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */
+
/**
* struct amdgpu_virt_ops - amdgpu device virt operations
*/
@@ -59,6 +65,7 @@ struct amdgpu_virt_ops {
void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
+ void (*init_reg_access_mode)(struct amdgpu_device *adev);
};
/*
@@ -246,6 +253,7 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ;
struct amdgpu_virt {
uint32_t caps;
struct amdgpu_bo *csa_obj;
+ void *csa_cpu_addr;
bool chained_ib_support;
uint32_t reg_val_offs;
struct amdgpu_irq_src ack_irq;
@@ -258,6 +266,7 @@ struct amdgpu_virt {
uint32_t gim_feature;
/* protect DPM events to GIM */
struct mutex dpm_mutex;
+ uint32_t reg_access_mode;
};
#define amdgpu_sriov_enabled(adev) \
@@ -307,4 +316,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
+void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
+bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
+bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev);
+bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4f10f5aba00b..24c3c05e2fb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -28,7 +28,7 @@
#include <linux/dma-fence-array.h>
#include <linux/interval_tree_generic.h>
#include <linux/idr.h>
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
@@ -1574,12 +1574,22 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
flags &= ~AMDGPU_PTE_EXECUTABLE;
flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
- flags &= ~AMDGPU_PTE_MTYPE_MASK;
- flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK);
+ if (adev->asic_type == CHIP_NAVI10) {
+ flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
+ flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
+ } else {
+ flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
+ flags |= (mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK);
+ }
if ((mapping->flags & AMDGPU_PTE_PRT) &&
(adev->asic_type >= CHIP_VEGA10)) {
flags |= AMDGPU_PTE_PRT;
+ if (adev->asic_type >= CHIP_NAVI10) {
+ flags |= AMDGPU_PTE_SNOOPED;
+ flags |= AMDGPU_PTE_LOG;
+ flags |= AMDGPU_PTE_SYSTEM;
+ }
flags &= ~AMDGPU_PTE_VALID;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 91baf95212a6..489a162ca620 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -67,6 +67,8 @@ struct amdgpu_bo_list_entry;
/* PDE is handled as PTE for VEGA10 */
#define AMDGPU_PDE_PTE (1ULL << 54)
+#define AMDGPU_PTE_LOG (1ULL << 55)
+
/* PTE is handled as PDE for VEGA10 (Translate Further) */
#define AMDGPU_PTE_TF (1ULL << 56)
@@ -75,8 +77,8 @@ struct amdgpu_bo_list_entry;
/* For GFX9 */
-#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
-#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
+#define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57)
+#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL)
#define AMDGPU_MTYPE_NC 0
#define AMDGPU_MTYPE_CC 2
@@ -86,7 +88,11 @@ struct amdgpu_bo_list_entry;
| AMDGPU_PTE_EXECUTABLE \
| AMDGPU_PTE_READABLE \
| AMDGPU_PTE_WRITEABLE \
- | AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC))
+ | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC))
+
+/* NAVI10 only */
+#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48)
+#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL)
/* How to programm VM fault handling */
#define AMDGPU_VM_FAULT_STOP_NEVER 0
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index ec9ea3fdbb4a..3a9d8c15fe9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -22,7 +22,6 @@
* Authors: Christian König
*/
-#include <drm/drmP.h>
#include "amdgpu.h"
struct amdgpu_vram_mgr {
@@ -276,7 +275,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
struct drm_mm_node *nodes;
enum drm_mm_insert_mode mode;
unsigned long lpfn, num_nodes, pages_per_node, pages_left;
- uint64_t usage = 0, vis_usage = 0;
+ uint64_t vis_usage = 0, mem_bytes;
unsigned i;
int r;
@@ -284,20 +283,34 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
if (!lpfn)
lpfn = man->size;
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS ||
- amdgpu_vram_page_split == -1) {
+ /* bail out quickly if there's likely not enough VRAM for this BO */
+ mem_bytes = (u64)mem->num_pages << PAGE_SHIFT;
+ if (atomic64_add_return(mem_bytes, &mgr->usage) > adev->gmc.mc_vram_size) {
+ atomic64_sub(mem_bytes, &mgr->usage);
+ mem->mm_node = NULL;
+ return 0;
+ }
+
+ if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
pages_per_node = ~0ul;
num_nodes = 1;
} else {
- pages_per_node = max((uint32_t)amdgpu_vram_page_split,
- mem->page_alignment);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ pages_per_node = HPAGE_PMD_NR;
+#else
+ /* default to 2MB */
+ pages_per_node = (2UL << (20UL - PAGE_SHIFT));
+#endif
+ pages_per_node = max((uint32_t)pages_per_node, mem->page_alignment);
num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
}
- nodes = kvmalloc_array(num_nodes, sizeof(*nodes),
+ nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
GFP_KERNEL | __GFP_ZERO);
- if (!nodes)
+ if (!nodes) {
+ atomic64_sub(mem_bytes, &mgr->usage);
return -ENOMEM;
+ }
mode = DRM_MM_INSERT_BEST;
if (place->flags & TTM_PL_FLAG_TOPDOWN)
@@ -317,7 +330,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
if (unlikely(r))
break;
- usage += nodes[i].size << PAGE_SHIFT;
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
pages_left -= pages;
@@ -337,14 +349,12 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
if (unlikely(r))
goto error;
- usage += nodes[i].size << PAGE_SHIFT;
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
pages_left -= pages;
}
spin_unlock(&mgr->lock);
- atomic64_add(usage, &mgr->usage);
atomic64_add(vis_usage, &mgr->vis_usage);
mem->mm_node = nodes;
@@ -355,6 +365,7 @@ error:
while (i--)
drm_mm_remove_node(&nodes[i]);
spin_unlock(&mgr->lock);
+ atomic64_sub(mem->num_pages << PAGE_SHIFT, &mgr->usage);
kvfree(nodes);
return r == -ENOSPC ? 0 : r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index a48c84c51775..d11eba09eadd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -40,6 +40,34 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
return &hive->device_list;
}
+/**
+ * DOC: AMDGPU XGMI Support
+ *
+ * XGMI is a high speed interconnect that joins multiple GPU cards
+ * into a homogeneous memory space that is organized by a collective
+ * hive ID and individual node IDs, both of which are 64-bit numbers.
+ *
+ * The file xgmi_device_id contains the unique per GPU device ID and
+ * is stored in the /sys/class/drm/card${cardno}/device/ directory.
+ *
+ * Inside the device directory a sub-directory 'xgmi_hive_info' is
+ * created which contains the hive ID and the list of nodes.
+ *
+ * The hive ID is stored in:
+ * /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id
+ *
+ * The node information is stored in numbered directories:
+ * /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id
+ *
+ * Each device has their own xgmi_hive_info direction with a mirror
+ * set of node sub-directories.
+ *
+ * The XGMI memory space is built by contiguously adding the power of
+ * two padded VRAM space from each node to each other.
+ *
+ */
+
+
static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -238,7 +266,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
/* Each psp need to set the latest topology */
ret = psp_xgmi_set_topology_info(&adev->psp,
hive->number_devices,
- &hive->topology_info);
+ &adev->psp.xgmi_context.top_info);
if (ret)
dev_err(adev->dev,
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
@@ -248,9 +276,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
return ret;
}
+
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
+{
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0 ; i < top->num_nodes; ++i)
+ if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
+ return top->nodes[i].num_hops;
+ return -EINVAL;
+}
+
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
- struct psp_xgmi_topology_info *hive_topology;
+ struct psp_xgmi_topology_info *top_info;
struct amdgpu_hive_info *hive;
struct amdgpu_xgmi *entry;
struct amdgpu_device *tmp_adev = NULL;
@@ -283,35 +324,46 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
- hive_topology = &hive->topology_info;
+ top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
list_for_each_entry(entry, &hive->device_list, head)
- hive_topology->nodes[count++].node_id = entry->node_id;
+ top_info->nodes[count++].node_id = entry->node_id;
+ top_info->num_nodes = count;
hive->number_devices = count;
- /* Each psp need to get the latest topology */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
+ /* update node list for other device in the hive */
+ if (tmp_adev != adev) {
+ top_info = &tmp_adev->psp.xgmi_context.top_info;
+ top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id;
+ top_info->num_nodes = count;
+ }
+ ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
+ if (ret)
+ goto exit;
+ }
+
+ /* get latest topology info for each device from psp */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+ &tmp_adev->psp.xgmi_context.top_info);
if (ret) {
dev_err(tmp_adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
tmp_adev->gmc.xgmi.node_id,
tmp_adev->gmc.xgmi.hive_id, ret);
/* To do : continue with some node failed or disable the whole hive */
- break;
+ goto exit;
}
}
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
- if (ret)
- break;
- }
-
if (!ret)
ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
+
+ mutex_unlock(&hive->hive_lock);
+exit:
if (!ret)
dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
@@ -320,9 +372,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
ret);
-
- mutex_unlock(&hive->hive_lock);
-exit:
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 3e9c91e9a4bf..fbcee31788c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -27,7 +27,6 @@
struct amdgpu_hive_info {
uint64_t hive_id;
struct list_head device_list;
- struct psp_xgmi_topology_info topology_info;
int number_devices;
struct mutex hive_lock, reset_lock;
struct kobject *kobj;
@@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
new file mode 100644
index 000000000000..89b32b6b81c8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "athub_v2_0.h"
+
+#include "athub/athub_2_0_0_offset.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_default.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+static void
+athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+ data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+static void
+athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+ (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+ data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+ else
+ data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data);
+}
+
+int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ athub_v2_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ athub_v2_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_ATHUB_MGCG */
+ data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
+ if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
+
+ /* AMD_CG_SUPPORT_ATHUB_LS */
+ if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_ATHUB_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
new file mode 100644
index 000000000000..02932c1c8bab
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __ATHUB_V2_0_H__
+#define __ATHUB_V2_0_H__
+
+int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
index a39170991afe..4205bbe5d8d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.h
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -26,7 +26,8 @@
#define ATOM_H
#include <linux/types.h>
-#include <drm/drmP.h>
+
+struct drm_device;
#define ATOM_BIOS_MAGIC 0xAA55
#define ATOM_ATI_MAGIC_PTR 0x30
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
index 8a0818b23ea4..213e62a28ba0 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -23,7 +23,7 @@
* Authors: Dave Airlie
* Alex Deucher
*/
-#include <drm/drmP.h>
+
#include <drm/drm_crtc_helper.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_fixed.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index f81068ba4cc6..6858cde9fc5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -24,7 +24,7 @@
* Alex Deucher
* Jerome Glisse
*/
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 60e2447e12c5..1e94a9b652f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -23,7 +23,9 @@
* Authors: Dave Airlie
* Alex Deucher
*/
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
#include <drm/drm_crtc_helper.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index f9b2ce9a98f3..980c363b1a0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -22,7 +22,7 @@
* Authors: Alex Deucher
*
*/
-#include <drm/drmP.h>
+
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "atom.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 07c1f239e9c3..1ffbc0d3d7a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -24,7 +24,8 @@
#include <linux/firmware.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
@@ -1804,6 +1805,18 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
+static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
static const struct amdgpu_asic_funcs cik_asic_funcs =
{
.read_disabled_bios = &cik_read_disabled_bios,
@@ -1821,6 +1834,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.init_doorbell_index = &legacy_doorbell_index_init,
.get_pcie_usage = &cik_get_pcie_usage,
.need_reset_on_init = &cik_need_reset_on_init,
+ .get_pcie_replay_count = &cik_get_pcie_replay_count,
};
static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 721c757156e8..401c99f0b2d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -20,7 +20,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "cikd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index d42808b05971..c45304f1047c 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -21,8 +21,10 @@
*
* Authors: Alex Deucher
*/
+
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+
#include "amdgpu.h"
#include "amdgpu_ucode.h"
#include "amdgpu_trace.h"
@@ -640,7 +642,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h
new file mode 100644
index 000000000000..27a2ff0a07d2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h
@@ -0,0 +1,975 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const unsigned int gfx10_SECT_CONTEXT_def_1[] = {
+ 0x00000000, // DB_RENDER_CONTROL
+ 0x00000000, // DB_COUNT_CONTROL
+ 0x00000000, // DB_DEPTH_VIEW
+ 0x00000000, // DB_RENDER_OVERRIDE
+ 0x00000000, // DB_RENDER_OVERRIDE2
+ 0x00000000, // DB_HTILE_DATA_BASE
+ 0x00000000, // HOLE
+ 0x00000000, // DB_DEPTH_SIZE_XY
+ 0x00000000, // DB_DEPTH_BOUNDS_MIN
+ 0x00000000, // DB_DEPTH_BOUNDS_MAX
+ 0x00000000, // DB_STENCIL_CLEAR
+ 0x00000000, // DB_DEPTH_CLEAR
+ 0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+ 0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+ 0x00000000, // DB_DFSM_CONTROL
+ 0x00000000, // DB_DEPTH_INFO
+ 0x00000000, // DB_Z_INFO
+ 0x00000000, // DB_STENCIL_INFO
+ 0x00000000, // DB_Z_READ_BASE
+ 0x00000000, // DB_STENCIL_READ_BASE
+ 0x00000000, // DB_Z_WRITE_BASE
+ 0x00000000, // DB_STENCIL_WRITE_BASE
+ 0x00000000, // DB_DEPTH_SIZE
+ 0x00000000, // DB_DEPTH_SLICE
+ 0x00000000, // DB_Z_INFO2
+ 0x00000000, // DB_STENCIL_INFO2
+ 0x00000000, // DB_Z_READ_BASE_HI
+ 0x00000000, // DB_STENCIL_READ_BASE_HI
+ 0x00000000, // DB_Z_WRITE_BASE_HI
+ 0x00000000, // DB_STENCIL_WRITE_BASE_HI
+ 0x00000000, // DB_HTILE_DATA_BASE_HI
+ 0x00150055, // DB_RMI_L2_CACHE_CONTROL
+ 0x00000000, // TA_BC_BASE_ADDR
+ 0x00000000, // TA_BC_BASE_ADDR_HI
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // COHER_DEST_BASE_HI_0
+ 0x00000000, // COHER_DEST_BASE_HI_1
+ 0x00000000, // COHER_DEST_BASE_HI_2
+ 0x00000000, // COHER_DEST_BASE_HI_3
+ 0x00000000, // COHER_DEST_BASE_2
+ 0x00000000, // COHER_DEST_BASE_3
+ 0x00000000, // PA_SC_WINDOW_OFFSET
+ 0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+ 0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+ 0x0000ffff, // PA_SC_CLIPRECT_RULE
+ 0x00000000, // PA_SC_CLIPRECT_0_TL
+ 0x40004000, // PA_SC_CLIPRECT_0_BR
+ 0x00000000, // PA_SC_CLIPRECT_1_TL
+ 0x40004000, // PA_SC_CLIPRECT_1_BR
+ 0x00000000, // PA_SC_CLIPRECT_2_TL
+ 0x40004000, // PA_SC_CLIPRECT_2_BR
+ 0x00000000, // PA_SC_CLIPRECT_3_TL
+ 0x40004000, // PA_SC_CLIPRECT_3_BR
+ 0xaa99aaaa, // PA_SC_EDGERULE
+ 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0xffffffff, // CB_TARGET_MASK
+ 0xffffffff, // CB_SHADER_MASK
+ 0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+ 0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+ 0x00000000, // COHER_DEST_BASE_0
+ 0x00000000, // COHER_DEST_BASE_1
+ 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+ 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+ 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+ 0x00000000, // PA_SC_VPORT_ZMIN_0
+ 0x3f800000, // PA_SC_VPORT_ZMAX_0
+ 0x00000000, // PA_SC_VPORT_ZMIN_1
+ 0x3f800000, // PA_SC_VPORT_ZMAX_1
+ 0x00000000, // PA_SC_VPORT_ZMIN_2
+ 0x3f800000, // PA_SC_VPORT_ZMAX_2
+ 0x00000000, // PA_SC_VPORT_ZMIN_3
+ 0x3f800000, // PA_SC_VPORT_ZMAX_3
+ 0x00000000, // PA_SC_VPORT_ZMIN_4
+ 0x3f800000, // PA_SC_VPORT_ZMAX_4
+ 0x00000000, // PA_SC_VPORT_ZMIN_5
+ 0x3f800000, // PA_SC_VPORT_ZMAX_5
+ 0x00000000, // PA_SC_VPORT_ZMIN_6
+ 0x3f800000, // PA_SC_VPORT_ZMAX_6
+ 0x00000000, // PA_SC_VPORT_ZMIN_7
+ 0x3f800000, // PA_SC_VPORT_ZMAX_7
+ 0x00000000, // PA_SC_VPORT_ZMIN_8
+ 0x3f800000, // PA_SC_VPORT_ZMAX_8
+ 0x00000000, // PA_SC_VPORT_ZMIN_9
+ 0x3f800000, // PA_SC_VPORT_ZMAX_9
+ 0x00000000, // PA_SC_VPORT_ZMIN_10
+ 0x3f800000, // PA_SC_VPORT_ZMAX_10
+ 0x00000000, // PA_SC_VPORT_ZMIN_11
+ 0x3f800000, // PA_SC_VPORT_ZMAX_11
+ 0x00000000, // PA_SC_VPORT_ZMIN_12
+ 0x3f800000, // PA_SC_VPORT_ZMAX_12
+ 0x00000000, // PA_SC_VPORT_ZMIN_13
+ 0x3f800000, // PA_SC_VPORT_ZMAX_13
+ 0x00000000, // PA_SC_VPORT_ZMIN_14
+ 0x3f800000, // PA_SC_VPORT_ZMAX_14
+ 0x00000000, // PA_SC_VPORT_ZMIN_15
+ 0x3f800000, // PA_SC_VPORT_ZMAX_15
+ 0x00000000, // PA_SC_RASTER_CONFIG
+ 0x00000000, // PA_SC_RASTER_CONFIG_1
+ 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_2[] = {
+ 0x00000000, // CP_PERFMON_CNTX_CNTL
+ 0x00000000, // CP_RINGID
+ 0x00000000, // CP_VMID
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SC_RIGHT_VERT_GRID
+ 0x00000000, // PA_SC_LEFT_VERT_GRID
+ 0x00000000, // PA_SC_HORIZ_GRID
+ 0x00000000, // HOLE
+ 0x00000000, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0xffffffff, // VGT_MAX_VTX_INDX
+ 0x00000000, // VGT_MIN_VTX_INDX
+ 0x00000000, // VGT_INDX_OFFSET
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0x00550055, // CB_RMI_GL2_CACHE_CONTROL
+ 0x00000000, // CB_BLEND_RED
+ 0x00000000, // CB_BLEND_GREEN
+ 0x00000000, // CB_BLEND_BLUE
+ 0x00000000, // CB_BLEND_ALPHA
+ 0x00000000, // CB_DCC_CONTROL
+ 0x00000000, // CB_COVERAGE_OUT_CONTROL
+ 0x00000000, // DB_STENCIL_CONTROL
+ 0x01000000, // DB_STENCILREFMASK
+ 0x01000000, // DB_STENCILREFMASK_BF
+ 0, // HOLE
+ 0x00000000, // PA_CL_VPORT_XSCALE
+ 0x00000000, // PA_CL_VPORT_XOFFSET
+ 0x00000000, // PA_CL_VPORT_YSCALE
+ 0x00000000, // PA_CL_VPORT_YOFFSET
+ 0x00000000, // PA_CL_VPORT_ZSCALE
+ 0x00000000, // PA_CL_VPORT_ZOFFSET
+ 0x00000000, // PA_CL_VPORT_XSCALE_1
+ 0x00000000, // PA_CL_VPORT_XOFFSET_1
+ 0x00000000, // PA_CL_VPORT_YSCALE_1
+ 0x00000000, // PA_CL_VPORT_YOFFSET_1
+ 0x00000000, // PA_CL_VPORT_ZSCALE_1
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_1
+ 0x00000000, // PA_CL_VPORT_XSCALE_2
+ 0x00000000, // PA_CL_VPORT_XOFFSET_2
+ 0x00000000, // PA_CL_VPORT_YSCALE_2
+ 0x00000000, // PA_CL_VPORT_YOFFSET_2
+ 0x00000000, // PA_CL_VPORT_ZSCALE_2
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_2
+ 0x00000000, // PA_CL_VPORT_XSCALE_3
+ 0x00000000, // PA_CL_VPORT_XOFFSET_3
+ 0x00000000, // PA_CL_VPORT_YSCALE_3
+ 0x00000000, // PA_CL_VPORT_YOFFSET_3
+ 0x00000000, // PA_CL_VPORT_ZSCALE_3
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_3
+ 0x00000000, // PA_CL_VPORT_XSCALE_4
+ 0x00000000, // PA_CL_VPORT_XOFFSET_4
+ 0x00000000, // PA_CL_VPORT_YSCALE_4
+ 0x00000000, // PA_CL_VPORT_YOFFSET_4
+ 0x00000000, // PA_CL_VPORT_ZSCALE_4
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_4
+ 0x00000000, // PA_CL_VPORT_XSCALE_5
+ 0x00000000, // PA_CL_VPORT_XOFFSET_5
+ 0x00000000, // PA_CL_VPORT_YSCALE_5
+ 0x00000000, // PA_CL_VPORT_YOFFSET_5
+ 0x00000000, // PA_CL_VPORT_ZSCALE_5
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_5
+ 0x00000000, // PA_CL_VPORT_XSCALE_6
+ 0x00000000, // PA_CL_VPORT_XOFFSET_6
+ 0x00000000, // PA_CL_VPORT_YSCALE_6
+ 0x00000000, // PA_CL_VPORT_YOFFSET_6
+ 0x00000000, // PA_CL_VPORT_ZSCALE_6
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_6
+ 0x00000000, // PA_CL_VPORT_XSCALE_7
+ 0x00000000, // PA_CL_VPORT_XOFFSET_7
+ 0x00000000, // PA_CL_VPORT_YSCALE_7
+ 0x00000000, // PA_CL_VPORT_YOFFSET_7
+ 0x00000000, // PA_CL_VPORT_ZSCALE_7
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_7
+ 0x00000000, // PA_CL_VPORT_XSCALE_8
+ 0x00000000, // PA_CL_VPORT_XOFFSET_8
+ 0x00000000, // PA_CL_VPORT_YSCALE_8
+ 0x00000000, // PA_CL_VPORT_YOFFSET_8
+ 0x00000000, // PA_CL_VPORT_ZSCALE_8
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_8
+ 0x00000000, // PA_CL_VPORT_XSCALE_9
+ 0x00000000, // PA_CL_VPORT_XOFFSET_9
+ 0x00000000, // PA_CL_VPORT_YSCALE_9
+ 0x00000000, // PA_CL_VPORT_YOFFSET_9
+ 0x00000000, // PA_CL_VPORT_ZSCALE_9
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_9
+ 0x00000000, // PA_CL_VPORT_XSCALE_10
+ 0x00000000, // PA_CL_VPORT_XOFFSET_10
+ 0x00000000, // PA_CL_VPORT_YSCALE_10
+ 0x00000000, // PA_CL_VPORT_YOFFSET_10
+ 0x00000000, // PA_CL_VPORT_ZSCALE_10
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_10
+ 0x00000000, // PA_CL_VPORT_XSCALE_11
+ 0x00000000, // PA_CL_VPORT_XOFFSET_11
+ 0x00000000, // PA_CL_VPORT_YSCALE_11
+ 0x00000000, // PA_CL_VPORT_YOFFSET_11
+ 0x00000000, // PA_CL_VPORT_ZSCALE_11
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_11
+ 0x00000000, // PA_CL_VPORT_XSCALE_12
+ 0x00000000, // PA_CL_VPORT_XOFFSET_12
+ 0x00000000, // PA_CL_VPORT_YSCALE_12
+ 0x00000000, // PA_CL_VPORT_YOFFSET_12
+ 0x00000000, // PA_CL_VPORT_ZSCALE_12
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_12
+ 0x00000000, // PA_CL_VPORT_XSCALE_13
+ 0x00000000, // PA_CL_VPORT_XOFFSET_13
+ 0x00000000, // PA_CL_VPORT_YSCALE_13
+ 0x00000000, // PA_CL_VPORT_YOFFSET_13
+ 0x00000000, // PA_CL_VPORT_ZSCALE_13
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_13
+ 0x00000000, // PA_CL_VPORT_XSCALE_14
+ 0x00000000, // PA_CL_VPORT_XOFFSET_14
+ 0x00000000, // PA_CL_VPORT_YSCALE_14
+ 0x00000000, // PA_CL_VPORT_YOFFSET_14
+ 0x00000000, // PA_CL_VPORT_ZSCALE_14
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_14
+ 0x00000000, // PA_CL_VPORT_XSCALE_15
+ 0x00000000, // PA_CL_VPORT_XOFFSET_15
+ 0x00000000, // PA_CL_VPORT_YSCALE_15
+ 0x00000000, // PA_CL_VPORT_YOFFSET_15
+ 0x00000000, // PA_CL_VPORT_ZSCALE_15
+ 0x00000000, // PA_CL_VPORT_ZOFFSET_15
+ 0x00000000, // PA_CL_UCP_0_X
+ 0x00000000, // PA_CL_UCP_0_Y
+ 0x00000000, // PA_CL_UCP_0_Z
+ 0x00000000, // PA_CL_UCP_0_W
+ 0x00000000, // PA_CL_UCP_1_X
+ 0x00000000, // PA_CL_UCP_1_Y
+ 0x00000000, // PA_CL_UCP_1_Z
+ 0x00000000, // PA_CL_UCP_1_W
+ 0x00000000, // PA_CL_UCP_2_X
+ 0x00000000, // PA_CL_UCP_2_Y
+ 0x00000000, // PA_CL_UCP_2_Z
+ 0x00000000, // PA_CL_UCP_2_W
+ 0x00000000, // PA_CL_UCP_3_X
+ 0x00000000, // PA_CL_UCP_3_Y
+ 0x00000000, // PA_CL_UCP_3_Z
+ 0x00000000, // PA_CL_UCP_3_W
+ 0x00000000, // PA_CL_UCP_4_X
+ 0x00000000, // PA_CL_UCP_4_Y
+ 0x00000000, // PA_CL_UCP_4_Z
+ 0x00000000, // PA_CL_UCP_4_W
+ 0x00000000, // PA_CL_UCP_5_X
+ 0x00000000, // PA_CL_UCP_5_Y
+ 0x00000000, // PA_CL_UCP_5_Z
+ 0x00000000, // PA_CL_UCP_5_W
+ 0x00000000, // PA_CL_PROG_NEAR_CLIP_Z
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_CNTL_0
+ 0x00000000, // SPI_PS_INPUT_CNTL_1
+ 0x00000000, // SPI_PS_INPUT_CNTL_2
+ 0x00000000, // SPI_PS_INPUT_CNTL_3
+ 0x00000000, // SPI_PS_INPUT_CNTL_4
+ 0x00000000, // SPI_PS_INPUT_CNTL_5
+ 0x00000000, // SPI_PS_INPUT_CNTL_6
+ 0x00000000, // SPI_PS_INPUT_CNTL_7
+ 0x00000000, // SPI_PS_INPUT_CNTL_8
+ 0x00000000, // SPI_PS_INPUT_CNTL_9
+ 0x00000000, // SPI_PS_INPUT_CNTL_10
+ 0x00000000, // SPI_PS_INPUT_CNTL_11
+ 0x00000000, // SPI_PS_INPUT_CNTL_12
+ 0x00000000, // SPI_PS_INPUT_CNTL_13
+ 0x00000000, // SPI_PS_INPUT_CNTL_14
+ 0x00000000, // SPI_PS_INPUT_CNTL_15
+ 0x00000000, // SPI_PS_INPUT_CNTL_16
+ 0x00000000, // SPI_PS_INPUT_CNTL_17
+ 0x00000000, // SPI_PS_INPUT_CNTL_18
+ 0x00000000, // SPI_PS_INPUT_CNTL_19
+ 0x00000000, // SPI_PS_INPUT_CNTL_20
+ 0x00000000, // SPI_PS_INPUT_CNTL_21
+ 0x00000000, // SPI_PS_INPUT_CNTL_22
+ 0x00000000, // SPI_PS_INPUT_CNTL_23
+ 0x00000000, // SPI_PS_INPUT_CNTL_24
+ 0x00000000, // SPI_PS_INPUT_CNTL_25
+ 0x00000000, // SPI_PS_INPUT_CNTL_26
+ 0x00000000, // SPI_PS_INPUT_CNTL_27
+ 0x00000000, // SPI_PS_INPUT_CNTL_28
+ 0x00000000, // SPI_PS_INPUT_CNTL_29
+ 0x00000000, // SPI_PS_INPUT_CNTL_30
+ 0x00000000, // SPI_PS_INPUT_CNTL_31
+ 0x00000000, // SPI_VS_OUT_CONFIG
+ 0, // HOLE
+ 0x00000000, // SPI_PS_INPUT_ENA
+ 0x00000000, // SPI_PS_INPUT_ADDR
+ 0x00000000, // SPI_INTERP_CONTROL_0
+ 0x00000002, // SPI_PS_IN_CONTROL
+ 0, // HOLE
+ 0x00000000, // SPI_BARYC_CNTL
+ 0, // HOLE
+ 0x00000000, // SPI_TMPRING_SIZE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SPI_SHADER_IDX_FORMAT
+ 0x00000000, // SPI_SHADER_POS_FORMAT
+ 0x00000000, // SPI_SHADER_Z_FORMAT
+ 0x00000000, // SPI_SHADER_COL_FORMAT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // SX_PS_DOWNCONVERT
+ 0x00000000, // SX_BLEND_OPT_EPSILON
+ 0x00000000, // SX_BLEND_OPT_CONTROL
+ 0x00000000, // SX_MRT0_BLEND_OPT
+ 0x00000000, // SX_MRT1_BLEND_OPT
+ 0x00000000, // SX_MRT2_BLEND_OPT
+ 0x00000000, // SX_MRT3_BLEND_OPT
+ 0x00000000, // SX_MRT4_BLEND_OPT
+ 0x00000000, // SX_MRT5_BLEND_OPT
+ 0x00000000, // SX_MRT6_BLEND_OPT
+ 0x00000000, // SX_MRT7_BLEND_OPT
+ 0x00000000, // CB_BLEND0_CONTROL
+ 0x00000000, // CB_BLEND1_CONTROL
+ 0x00000000, // CB_BLEND2_CONTROL
+ 0x00000000, // CB_BLEND3_CONTROL
+ 0x00000000, // CB_BLEND4_CONTROL
+ 0x00000000, // CB_BLEND5_CONTROL
+ 0x00000000, // CB_BLEND6_CONTROL
+ 0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_3[] = {
+ 0x00000000, // PA_CL_POINT_X_RAD
+ 0x00000000, // PA_CL_POINT_Y_RAD
+ 0x00000000, // PA_CL_POINT_SIZE
+ 0x00000000, // PA_CL_POINT_CULL_RAD
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_4[] = {
+ 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
+ 0x00000000, // DB_DEPTH_CONTROL
+ 0x00000000, // DB_EQAA
+ 0x00000000, // CB_COLOR_CONTROL
+ 0x00000000, // DB_SHADER_CONTROL
+ 0x00090000, // PA_CL_CLIP_CNTL
+ 0x00000004, // PA_SU_SC_MODE_CNTL
+ 0x00000000, // PA_CL_VTE_CNTL
+ 0x00000000, // PA_CL_VS_OUT_CNTL
+ 0x00000000, // PA_CL_NANINF_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+ 0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+ 0x00000000, // PA_SU_PRIM_FILTER_CNTL
+ 0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL
+ 0x00000000, // PA_CL_OBJPRIM_ID_CNTL
+ 0x00000000, // PA_CL_NGG_CNTL
+ 0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // PA_SU_POINT_SIZE
+ 0x00000000, // PA_SU_POINT_MINMAX
+ 0x00000000, // PA_SU_LINE_CNTL
+ 0x00000000, // PA_SC_LINE_STIPPLE
+ 0x00000000, // VGT_OUTPUT_PATH_CNTL
+ 0x00000000, // VGT_HOS_CNTL
+ 0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+ 0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+ 0x00000000, // VGT_HOS_REUSE_DEPTH
+ 0x00000000, // VGT_GROUP_PRIM_TYPE
+ 0x00000000, // VGT_GROUP_FIRST_DECR
+ 0x00000000, // VGT_GROUP_DECR
+ 0x00000000, // VGT_GROUP_VECT_0_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_CNTL
+ 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+ 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+ 0x00000000, // VGT_GS_MODE
+ 0x00000000, // VGT_GS_ONCHIP_CNTL
+ 0x00000000, // PA_SC_MODE_CNTL_0
+ 0x00000000, // PA_SC_MODE_CNTL_1
+ 0x00000000, // VGT_ENHANCE
+ 0x00000100, // VGT_GS_PER_ES
+ 0x00000080, // VGT_ES_PER_GS
+ 0x00000002, // VGT_GS_PER_VS
+ 0x00000000, // VGT_GSVS_RING_OFFSET_1
+ 0x00000000, // VGT_GSVS_RING_OFFSET_2
+ 0x00000000, // VGT_GSVS_RING_OFFSET_3
+ 0x00000000, // VGT_GS_OUT_PRIM_TYPE
+ 0x00000000, // IA_ENHANCE
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_5[] = {
+ 0x00000000, // WD_ENHANCE
+ 0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_6[] = {
+ 0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_7[] = {
+ 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
+ 0x00000000, // VGT_DRAW_PAYLOAD_CNTL
+ 0x00000000, // HOLE
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_0
+ 0x00000000, // VGT_INSTANCE_STEP_RATE_1
+ 0x000000ff, // IA_MULTI_VGT_PARAM
+ 0x00000000, // VGT_ESGS_RING_ITEMSIZE
+ 0x00000000, // VGT_GSVS_RING_ITEMSIZE
+ 0x00000000, // VGT_REUSE_OFF
+ 0x00000000, // VGT_VTX_CNT_EN
+ 0x00000000, // DB_HTILE_SURFACE
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE0
+ 0x00000000, // DB_SRESULTS_COMPARE_STATE1
+ 0x00000000, // DB_PRELOAD_CONTROL
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+ 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+ 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0, // HOLE
+ 0x00000000, // VGT_GS_MAX_VERT_OUT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0x00000000, // VGT_TESS_DISTRIBUTION
+ 0x00000000, // VGT_SHADER_STAGES_EN
+ 0x00000000, // VGT_LS_HS_CONFIG
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+ 0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+ 0x00000000, // VGT_TF_PARAM
+ 0x00000000, // DB_ALPHA_TO_MASK
+ 0x00000000, // VGT_DISPATCH_DRAW_INDEX
+ 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x00000000, // VGT_GS_INSTANCE_CNT
+ 0x00000000, // VGT_STRMOUT_CONFIG
+ 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+};
+static const unsigned int gfx10_SECT_CONTEXT_def_8[] = {
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_0
+ 0x00000000, // PA_SC_CENTROID_PRIORITY_1
+ 0x00001000, // PA_SC_LINE_CNTL
+ 0x00000000, // PA_SC_AA_CONFIG
+ 0x00000005, // PA_SU_VTX_CNTL
+ 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+ 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+ 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+ 0x00000000, // PA_SC_SHADER_CONTROL
+ 0x00000003, // PA_SC_BINNER_CNTL_0
+ 0x00000000, // PA_SC_BINNER_CNTL_1
+ 0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+ 0x00000000, // PA_SC_NGG_MODE_CNTL
+ 0, // HOLE
+ 0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+ 0x00000020, // VGT_OUT_DEALLOC_CNTL
+ 0x00000000, // CB_COLOR0_BASE
+ 0x00000000, // CB_COLOR0_PITCH
+ 0x00000000, // CB_COLOR0_SLICE
+ 0x00000000, // CB_COLOR0_VIEW
+ 0x00000000, // CB_COLOR0_INFO
+ 0x00000000, // CB_COLOR0_ATTRIB
+ 0x00000000, // CB_COLOR0_DCC_CONTROL
+ 0x00000000, // CB_COLOR0_CMASK
+ 0x00000000, // CB_COLOR0_CMASK_SLICE
+ 0x00000000, // CB_COLOR0_FMASK
+ 0x00000000, // CB_COLOR0_FMASK_SLICE
+ 0x00000000, // CB_COLOR0_CLEAR_WORD0
+ 0x00000000, // CB_COLOR0_CLEAR_WORD1
+ 0x00000000, // CB_COLOR0_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR1_BASE
+ 0x00000000, // CB_COLOR1_PITCH
+ 0x00000000, // CB_COLOR1_SLICE
+ 0x00000000, // CB_COLOR1_VIEW
+ 0x00000000, // CB_COLOR1_INFO
+ 0x00000000, // CB_COLOR1_ATTRIB
+ 0x00000000, // CB_COLOR1_DCC_CONTROL
+ 0x00000000, // CB_COLOR1_CMASK
+ 0x00000000, // CB_COLOR1_CMASK_SLICE
+ 0x00000000, // CB_COLOR1_FMASK
+ 0x00000000, // CB_COLOR1_FMASK_SLICE
+ 0x00000000, // CB_COLOR1_CLEAR_WORD0
+ 0x00000000, // CB_COLOR1_CLEAR_WORD1
+ 0x00000000, // CB_COLOR1_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR2_BASE
+ 0x00000000, // CB_COLOR2_PITCH
+ 0x00000000, // CB_COLOR2_SLICE
+ 0x00000000, // CB_COLOR2_VIEW
+ 0x00000000, // CB_COLOR2_INFO
+ 0x00000000, // CB_COLOR2_ATTRIB
+ 0x00000000, // CB_COLOR2_DCC_CONTROL
+ 0x00000000, // CB_COLOR2_CMASK
+ 0x00000000, // CB_COLOR2_CMASK_SLICE
+ 0x00000000, // CB_COLOR2_FMASK
+ 0x00000000, // CB_COLOR2_FMASK_SLICE
+ 0x00000000, // CB_COLOR2_CLEAR_WORD0
+ 0x00000000, // CB_COLOR2_CLEAR_WORD1
+ 0x00000000, // CB_COLOR2_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR3_BASE
+ 0x00000000, // CB_COLOR3_PITCH
+ 0x00000000, // CB_COLOR3_SLICE
+ 0x00000000, // CB_COLOR3_VIEW
+ 0x00000000, // CB_COLOR3_INFO
+ 0x00000000, // CB_COLOR3_ATTRIB
+ 0x00000000, // CB_COLOR3_DCC_CONTROL
+ 0x00000000, // CB_COLOR3_CMASK
+ 0x00000000, // CB_COLOR3_CMASK_SLICE
+ 0x00000000, // CB_COLOR3_FMASK
+ 0x00000000, // CB_COLOR3_FMASK_SLICE
+ 0x00000000, // CB_COLOR3_CLEAR_WORD0
+ 0x00000000, // CB_COLOR3_CLEAR_WORD1
+ 0x00000000, // CB_COLOR3_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR4_BASE
+ 0x00000000, // CB_COLOR4_PITCH
+ 0x00000000, // CB_COLOR4_SLICE
+ 0x00000000, // CB_COLOR4_VIEW
+ 0x00000000, // CB_COLOR4_INFO
+ 0x00000000, // CB_COLOR4_ATTRIB
+ 0x00000000, // CB_COLOR4_DCC_CONTROL
+ 0x00000000, // CB_COLOR4_CMASK
+ 0x00000000, // CB_COLOR4_CMASK_SLICE
+ 0x00000000, // CB_COLOR4_FMASK
+ 0x00000000, // CB_COLOR4_FMASK_SLICE
+ 0x00000000, // CB_COLOR4_CLEAR_WORD0
+ 0x00000000, // CB_COLOR4_CLEAR_WORD1
+ 0x00000000, // CB_COLOR4_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR5_BASE
+ 0x00000000, // CB_COLOR5_PITCH
+ 0x00000000, // CB_COLOR5_SLICE
+ 0x00000000, // CB_COLOR5_VIEW
+ 0x00000000, // CB_COLOR5_INFO
+ 0x00000000, // CB_COLOR5_ATTRIB
+ 0x00000000, // CB_COLOR5_DCC_CONTROL
+ 0x00000000, // CB_COLOR5_CMASK
+ 0x00000000, // CB_COLOR5_CMASK_SLICE
+ 0x00000000, // CB_COLOR5_FMASK
+ 0x00000000, // CB_COLOR5_FMASK_SLICE
+ 0x00000000, // CB_COLOR5_CLEAR_WORD0
+ 0x00000000, // CB_COLOR5_CLEAR_WORD1
+ 0x00000000, // CB_COLOR5_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR6_BASE
+ 0x00000000, // CB_COLOR6_PITCH
+ 0x00000000, // CB_COLOR6_SLICE
+ 0x00000000, // CB_COLOR6_VIEW
+ 0x00000000, // CB_COLOR6_INFO
+ 0x00000000, // CB_COLOR6_ATTRIB
+ 0x00000000, // CB_COLOR6_DCC_CONTROL
+ 0x00000000, // CB_COLOR6_CMASK
+ 0x00000000, // CB_COLOR6_CMASK_SLICE
+ 0x00000000, // CB_COLOR6_FMASK
+ 0x00000000, // CB_COLOR6_FMASK_SLICE
+ 0x00000000, // CB_COLOR6_CLEAR_WORD0
+ 0x00000000, // CB_COLOR6_CLEAR_WORD1
+ 0x00000000, // CB_COLOR6_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR7_BASE
+ 0x00000000, // CB_COLOR7_PITCH
+ 0x00000000, // CB_COLOR7_SLICE
+ 0x00000000, // CB_COLOR7_VIEW
+ 0x00000000, // CB_COLOR7_INFO
+ 0x00000000, // CB_COLOR7_ATTRIB
+ 0x00000000, // CB_COLOR7_DCC_CONTROL
+ 0x00000000, // CB_COLOR7_CMASK
+ 0x00000000, // CB_COLOR7_CMASK_SLICE
+ 0x00000000, // CB_COLOR7_FMASK
+ 0x00000000, // CB_COLOR7_FMASK_SLICE
+ 0x00000000, // CB_COLOR7_CLEAR_WORD0
+ 0x00000000, // CB_COLOR7_CLEAR_WORD1
+ 0x00000000, // CB_COLOR7_DCC_BASE
+ 0, // HOLE
+ 0x00000000, // CB_COLOR0_BASE_EXT
+ 0x00000000, // CB_COLOR1_BASE_EXT
+ 0x00000000, // CB_COLOR2_BASE_EXT
+ 0x00000000, // CB_COLOR3_BASE_EXT
+ 0x00000000, // CB_COLOR4_BASE_EXT
+ 0x00000000, // CB_COLOR5_BASE_EXT
+ 0x00000000, // CB_COLOR6_BASE_EXT
+ 0x00000000, // CB_COLOR7_BASE_EXT
+ 0x00000000, // CB_COLOR0_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR1_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR2_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR3_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR4_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR5_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR6_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR7_CMASK_BASE_EXT
+ 0x00000000, // CB_COLOR0_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR1_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR2_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR3_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR4_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR5_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR6_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR7_FMASK_BASE_EXT
+ 0x00000000, // CB_COLOR0_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR1_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR2_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR3_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR4_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR5_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR6_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR7_DCC_BASE_EXT
+ 0x00000000, // CB_COLOR0_ATTRIB2
+ 0x00000000, // CB_COLOR1_ATTRIB2
+ 0x00000000, // CB_COLOR2_ATTRIB2
+ 0x00000000, // CB_COLOR3_ATTRIB2
+ 0x00000000, // CB_COLOR4_ATTRIB2
+ 0x00000000, // CB_COLOR5_ATTRIB2
+ 0x00000000, // CB_COLOR6_ATTRIB2
+ 0x00000000, // CB_COLOR7_ATTRIB2
+ 0x00000000, // CB_COLOR0_ATTRIB3
+ 0x00000000, // CB_COLOR1_ATTRIB3
+ 0x00000000, // CB_COLOR2_ATTRIB3
+ 0x00000000, // CB_COLOR3_ATTRIB3
+ 0x00000000, // CB_COLOR4_ATTRIB3
+ 0x00000000, // CB_COLOR5_ATTRIB3
+ 0x00000000, // CB_COLOR6_ATTRIB3
+ 0x00000000, // CB_COLOR7_ATTRIB3
+};
+static const struct cs_extent_def gfx10_SECT_CONTEXT_defs[] = {
+ {gfx10_SECT_CONTEXT_def_1, 0x0000a000, 215 },
+ {gfx10_SECT_CONTEXT_def_2, 0x0000a0d8, 272 },
+ {gfx10_SECT_CONTEXT_def_3, 0x0000a1f5, 4 },
+ {gfx10_SECT_CONTEXT_def_4, 0x0000a1ff, 158 },
+ {gfx10_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+ {gfx10_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+ {gfx10_SECT_CONTEXT_def_7, 0x0000a2a5, 66 },
+ {gfx10_SECT_CONTEXT_def_8, 0x0000a2f5, 203 },
+ { 0, 0, 0 }
+};
+static const struct cs_section_def gfx10_cs_data[] = {
+ { gfx10_SECT_CONTEXT_defs, SECT_CONTEXT },
+ { 0, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index 61024b9c7a4b..1dca0cabc326 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -20,7 +20,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 1f0426d2fc2a..1ffd1963e765 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -20,7 +20,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_vblank.h>
+
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 2280b971d758..9e0782b54066 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -20,7 +20,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_vblank.h>
+
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index bea32f076b91..4bf453e07dca 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -20,7 +20,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_vblank.h>
+
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 13da915991dd..b23418ca8f6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -20,7 +20,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_vblank.h>
+
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index e4cc1d48eaab..3cc0a16649f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -20,7 +20,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
+#include <drm/drm_vblank.h>
+
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_i2c.h"
@@ -455,6 +457,7 @@ static int dce_virtual_hw_init(void *handle)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
+ case CHIP_NAVI10:
break;
default:
DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
index 9935371db7ce..844c03868248 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -29,7 +29,7 @@
static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
-static void df_v1_7_init (struct amdgpu_device *adev)
+static void df_v1_7_sw_init(struct amdgpu_device *adev)
{
}
@@ -110,7 +110,7 @@ static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
}
const struct amdgpu_df_funcs df_v1_7_funcs = {
- .init = df_v1_7_init,
+ .sw_init = df_v1_7_sw_init,
.enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
.get_fb_channel_number = df_v1_7_get_fb_channel_number,
.get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index d5ebe566809b..ef6e91f9f51c 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -30,8 +30,104 @@
static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
16, 32, 0, 0, 0, 2, 4, 8};
-static void df_v3_6_init(struct amdgpu_device *adev)
+/* init df format attrs */
+AMDGPU_PMU_ATTR(event, "config:0-7");
+AMDGPU_PMU_ATTR(instance, "config:8-15");
+AMDGPU_PMU_ATTR(umask, "config:16-23");
+
+/* df format attributes */
+static struct attribute *df_v3_6_format_attrs[] = {
+ &pmu_attr_event.attr,
+ &pmu_attr_instance.attr,
+ &pmu_attr_umask.attr,
+ NULL
+};
+
+/* df format attribute group */
+static struct attribute_group df_v3_6_format_attr_group = {
+ .name = "format",
+ .attrs = df_v3_6_format_attrs,
+};
+
+/* df event attrs */
+AMDGPU_PMU_ATTR(cake0_pcsout_txdata,
+ "event=0x7,instance=0x46,umask=0x2");
+AMDGPU_PMU_ATTR(cake1_pcsout_txdata,
+ "event=0x7,instance=0x47,umask=0x2");
+AMDGPU_PMU_ATTR(cake0_pcsout_txmeta,
+ "event=0x7,instance=0x46,umask=0x4");
+AMDGPU_PMU_ATTR(cake1_pcsout_txmeta,
+ "event=0x7,instance=0x47,umask=0x4");
+AMDGPU_PMU_ATTR(cake0_ftiinstat_reqalloc,
+ "event=0xb,instance=0x46,umask=0x4");
+AMDGPU_PMU_ATTR(cake1_ftiinstat_reqalloc,
+ "event=0xb,instance=0x47,umask=0x4");
+AMDGPU_PMU_ATTR(cake0_ftiinstat_rspalloc,
+ "event=0xb,instance=0x46,umask=0x8");
+AMDGPU_PMU_ATTR(cake1_ftiinstat_rspalloc,
+ "event=0xb,instance=0x47,umask=0x8");
+
+/* df event attributes */
+static struct attribute *df_v3_6_event_attrs[] = {
+ &pmu_attr_cake0_pcsout_txdata.attr,
+ &pmu_attr_cake1_pcsout_txdata.attr,
+ &pmu_attr_cake0_pcsout_txmeta.attr,
+ &pmu_attr_cake1_pcsout_txmeta.attr,
+ &pmu_attr_cake0_ftiinstat_reqalloc.attr,
+ &pmu_attr_cake1_ftiinstat_reqalloc.attr,
+ &pmu_attr_cake0_ftiinstat_rspalloc.attr,
+ &pmu_attr_cake1_ftiinstat_rspalloc.attr,
+ NULL
+};
+
+/* df event attribute group */
+static struct attribute_group df_v3_6_event_attr_group = {
+ .name = "events",
+ .attrs = df_v3_6_event_attrs
+};
+
+/* df event attr groups */
+const struct attribute_group *df_v3_6_attr_groups[] = {
+ &df_v3_6_format_attr_group,
+ &df_v3_6_event_attr_group,
+ NULL
+};
+
+/* get the number of df counters available */
+static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
+ struct amdgpu_device *adev;
+ struct drm_device *ddev;
+ int i, count;
+
+ ddev = dev_get_drvdata(dev);
+ adev = ddev->dev_private;
+ count = 0;
+
+ for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
+ if (adev->df_perfmon_config_assign_mask[i] == 0)
+ count++;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "%i\n", count);
+}
+
+/* device attr for available perfmon counters */
+static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
+
+/* init perfmons */
+static void df_v3_6_sw_init(struct amdgpu_device *adev)
+{
+ int i, ret;
+
+ ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail);
+ if (ret)
+ DRM_ERROR("failed to create file for available df counters\n");
+
+ for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
+ adev->df_perfmon_config_assign_mask[i] = 0;
}
static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
@@ -105,12 +201,303 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
*flags |= AMD_CG_SUPPORT_DF_MGCG;
}
+/* get assigned df perfmon ctr as int */
+static int df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
+ uint64_t config)
+{
+ int i;
+
+ for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
+ if ((config & 0x0FFFFFFUL) ==
+ adev->df_perfmon_config_assign_mask[i])
+ return i;
+ }
+
+ return -EINVAL;
+}
+
+/* get address based on counter assignment */
+static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
+ uint64_t config,
+ int is_ctrl,
+ uint32_t *lo_base_addr,
+ uint32_t *hi_base_addr)
+{
+ int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
+
+ if (target_cntr < 0)
+ return;
+
+ switch (target_cntr) {
+
+ case 0:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0;
+ break;
+ case 1:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1;
+ break;
+ case 2:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2;
+ break;
+ case 3:
+ *lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3;
+ *hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3;
+ break;
+
+ }
+
+}
+
+/* get read counter address */
+static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
+ uint64_t config,
+ uint32_t *lo_base_addr,
+ uint32_t *hi_base_addr)
+{
+ df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr);
+}
+
+/* get control counter settings i.e. address and values to set */
+static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
+ uint64_t config,
+ uint32_t *lo_base_addr,
+ uint32_t *hi_base_addr,
+ uint32_t *lo_val,
+ uint32_t *hi_val)
+{
+ df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
+
+ if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
+ DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x",
+ *lo_base_addr, *hi_base_addr);
+ return -ENXIO;
+ }
+
+ if (lo_val && hi_val) {
+ uint32_t eventsel, instance, unitmask;
+ uint32_t instance_10, instance_5432, instance_76;
+
+ eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
+ unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
+ instance = DF_V3_6_GET_INSTANCE(config);
+
+ instance_10 = instance & 0x3;
+ instance_5432 = (instance >> 2) & 0xf;
+ instance_76 = (instance >> 6) & 0x3;
+
+ *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel;
+ *hi_val = (instance_76 << 29) | instance_5432;
+ }
+
+ return 0;
+}
+
+/* assign df performance counters for read */
+static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
+ uint64_t config,
+ int *is_assigned)
+{
+ int i, target_cntr;
+
+ *is_assigned = 0;
+
+ target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
+
+ if (target_cntr >= 0) {
+ *is_assigned = 1;
+ return 0;
+ }
+
+ for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
+ if (adev->df_perfmon_config_assign_mask[i] == 0U) {
+ adev->df_perfmon_config_assign_mask[i] =
+ config & 0x0FFFFFFUL;
+ return 0;
+ }
+ }
+
+ return -ENOSPC;
+}
+
+/* release performance counter */
+static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
+ uint64_t config)
+{
+ int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
+
+ if (target_cntr >= 0)
+ adev->df_perfmon_config_assign_mask[target_cntr] = 0ULL;
+}
+
+
+static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
+ uint64_t config)
+{
+ uint32_t lo_base_addr, hi_base_addr;
+
+ df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
+ &hi_base_addr);
+
+ if ((lo_base_addr == 0) || (hi_base_addr == 0))
+ return;
+
+ WREG32_PCIE(lo_base_addr, 0UL);
+ WREG32_PCIE(hi_base_addr, 0UL);
+}
+
+
+static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev,
+ uint64_t config)
+{
+ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+ int ret, is_assigned;
+
+ ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);
+
+ if (ret || is_assigned)
+ return ret;
+
+ ret = df_v3_6_pmc_get_ctrl_settings(adev,
+ config,
+ &lo_base_addr,
+ &hi_base_addr,
+ &lo_val,
+ &hi_val);
+
+ if (ret)
+ return ret;
+
+ DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
+ config, lo_base_addr, hi_base_addr, lo_val, hi_val);
+
+ WREG32_PCIE(lo_base_addr, lo_val);
+ WREG32_PCIE(hi_base_addr, hi_val);
+
+ return ret;
+}
+
+static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
+ int is_enable)
+{
+ uint32_t lo_base_addr, hi_base_addr, lo_val;
+ int ret = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+
+ df_v3_6_reset_perfmon_cntr(adev, config);
+
+ if (is_enable) {
+ ret = df_v3_6_add_perfmon_cntr(adev, config);
+ } else {
+ ret = df_v3_6_pmc_get_ctrl_settings(adev,
+ config,
+ &lo_base_addr,
+ &hi_base_addr,
+ NULL,
+ NULL);
+
+ if (ret)
+ return ret;
+
+ lo_val = RREG32_PCIE(lo_base_addr);
+
+ DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
+ config, lo_base_addr, hi_base_addr, lo_val);
+
+ WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
+ int is_disable)
+{
+ uint32_t lo_base_addr, hi_base_addr, lo_val;
+ int ret = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ ret = df_v3_6_pmc_get_ctrl_settings(adev,
+ config,
+ &lo_base_addr,
+ &hi_base_addr,
+ NULL,
+ NULL);
+
+ if (ret)
+ return ret;
+
+ lo_val = RREG32_PCIE(lo_base_addr);
+
+ DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
+ config, lo_base_addr, hi_base_addr, lo_val);
+
+ WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
+
+ if (is_disable)
+ df_v3_6_pmc_release_cntr(adev, config);
+
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
+ uint64_t config,
+ uint64_t *count)
+{
+ uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+ *count = 0;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+
+ df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
+ &hi_base_addr);
+
+ if ((lo_base_addr == 0) || (hi_base_addr == 0))
+ return;
+
+ lo_val = RREG32_PCIE(lo_base_addr);
+ hi_val = RREG32_PCIE(hi_base_addr);
+
+ *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
+
+ if (*count >= DF_V3_6_PERFMON_OVERFLOW)
+ *count = 0;
+
+ DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
+ config, lo_base_addr, hi_base_addr, lo_val, hi_val);
+
+ break;
+
+ default:
+ break;
+ }
+}
+
const struct amdgpu_df_funcs df_v3_6_funcs = {
- .init = df_v3_6_init,
+ .sw_init = df_v3_6_sw_init,
.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
.get_fb_channel_number = df_v3_6_get_fb_channel_number,
.get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
.update_medium_grain_clock_gating =
df_v3_6_update_medium_grain_clock_gating,
.get_clockgating_state = df_v3_6_get_clockgating_state,
+ .pmc_start = df_v3_6_pmc_start,
+ .pmc_stop = df_v3_6_pmc_stop,
+ .pmc_get_count = df_v3_6_pmc_get_count
};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
index e79c58e5efcb..76998541bc30 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
@@ -35,6 +35,16 @@ enum DF_V3_6_MGCG {
DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
};
+/* Defined in global_features.h as FTI_PERFMON_VISIBLE */
+#define DF_V3_6_MAX_COUNTERS 4
+
+/* get flags from df perfmon config */
+#define DF_V3_6_GET_EVENT(x) (x & 0xFFUL)
+#define DF_V3_6_GET_INSTANCE(x) ((x >> 8) & 0xFFUL)
+#define DF_V3_6_GET_UNITMASK(x) ((x >> 16) & 0xFFUL)
+#define DF_V3_6_PERFMON_OVERFLOW 0xFFFFFFFFFFFFULL
+
+extern const struct attribute_group *df_v3_6_attr_groups[];
extern const struct amdgpu_df_funcs df_v3_6_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
new file mode 100644
index 000000000000..ee41d5592c51
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -0,0 +1,5216 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "nv.h"
+#include "nvd.h"
+
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "navi10_enum.h"
+#include "hdp/hdp_5_0_0_offset.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+#include "clearstate_gfx10.h"
+#include "v10_structs.h"
+#include "gfx_v10_0.h"
+#include "nbio_v2_3.h"
+
+/**
+ * Navi10 has two graphic rings to share each graphic pipe.
+ * 1. Primary ring
+ * 2. Async ring
+ *
+ * In bring-up phase, it just used primary ring so set gfx ring number as 1 at
+ * first.
+ */
+#define GFX10_NUM_GFX_RINGS 2
+#define GFX10_MEC_HPD_SIZE 2048
+
+#define F32_CE_PROGRAM_RAM_SIZE 65536
+#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+
+MODULE_FIRMWARE("amdgpu/navi10_ce.bin");
+MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");
+MODULE_FIRMWARE("amdgpu/navi10_me.bin");
+MODULE_FIRMWARE("amdgpu/navi10_mec.bin");
+MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");
+MODULE_FIRMWARE("amdgpu/navi10_rlc.bin");
+
+static const struct soc15_reg_golden golden_settings_gc_10_1[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xfeff8fff, 0xfeff8100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x000007ff, 0x000005ff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0x20000000, 0x20000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07900000, 0x04900000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =
+{
+ /* Pending on emulation bring up */
+};
+
+static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev);
+static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev);
+static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev);
+static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info);
+static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);
+static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance);
+static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
+
+static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev);
+static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev);
+static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
+static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
+static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
+static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
+
+static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
+{
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
+ amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
+ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
+ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
+ amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
+ amdgpu_ring_write(kiq_ring, 0); /* oac mask */
+ amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+}
+
+static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = kiq_ring->adev;
+ uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
+ /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
+ PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
+ PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
+ PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
+ PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
+ PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
+ PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
+ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+}
+
+static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_UNMAP_QUEUES_ACTION(action) |
+ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
+ PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+
+ if (action == PREEMPT_QUEUES_NO_UNMAP) {
+ amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(kiq_ring, seq);
+ } else {
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ }
+}
+
+static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
+ struct amdgpu_ring *ring,
+ u64 addr,
+ u64 seq)
+{
+ uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
+ amdgpu_ring_write(kiq_ring,
+ PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
+ PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
+ PACKET3_QUERY_STATUS_COMMAND(2));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
+ amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
+ amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+}
+
+static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
+ .kiq_set_resources = gfx10_kiq_set_resources,
+ .kiq_map_queues = gfx10_kiq_map_queues,
+ .kiq_unmap_queues = gfx10_kiq_unmap_queues,
+ .kiq_query_status = gfx10_kiq_query_status,
+ .set_resources_size = 8,
+ .map_queues_size = 7,
+ .unmap_queues_size = 6,
+ .query_status_size = 7,
+};
+
+static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.kiq.pmf = &gfx_v10_0_kiq_pm4_funcs;
+}
+
+static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_1,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_1));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_10_0_nv10,
+ (const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10));
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v10_0_scratch_init(struct amdgpu_device *adev)
+{
+ adev->gfx.scratch.num_reg = 8;
+ adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
+ adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
+}
+
+static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
+ bool wc, uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
+ int mem_space, int opt, uint32_t addr0,
+ uint32_t addr1, uint32_t ref, uint32_t mask,
+ uint32_t inv)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring,
+ /* memory (1) or register (0) */
+ (WAIT_REG_MEM_MEM_SPACE(mem_space) |
+ WAIT_REG_MEM_OPERATION(opt) | /* wait */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(eng_sel)));
+
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+ amdgpu_ring_write(ring, addr0);
+ amdgpu_ring_write(ring, addr1);
+ amdgpu_ring_write(ring, ref);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, inv); /* poll interval */
+}
+
+static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ r = amdgpu_gfx_scratch_get(adev, &scratch);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
+ return r;
+ }
+
+ WREG32(scratch, 0xCAFEDEAD);
+
+ r = amdgpu_ring_alloc(ring, 3);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ amdgpu_gfx_scratch_free(adev, scratch);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
+ amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ DRM_UDELAY(1);
+ }
+ if (i < adev->usec_timeout) {
+ if (amdgpu_emu_mode == 1)
+ DRM_INFO("ring test on %d succeeded in %d msecs\n",
+ ring->idx, i);
+ else
+ DRM_INFO("ring test on %d succeeded in %d usecs\n",
+ ring->idx, i);
+ } else {
+ DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
+ ring->idx, scratch, tmp);
+ r = -EINVAL;
+ }
+ amdgpu_gfx_scratch_free(adev, scratch);
+
+ return r;
+}
+
+static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ uint32_t scratch;
+ uint32_t tmp = 0;
+ long r;
+
+ r = amdgpu_gfx_scratch_get(adev, &scratch);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
+ return r;
+ }
+
+ WREG32(scratch, 0xCAFEDEAD);
+
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
+ }
+
+ ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
+ ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
+ ib.ptr[2] = 0xDEADBEEF;
+ ib.length_dw = 3;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out.\n");
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err2;
+ }
+
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF) {
+ DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ r = 0;
+ } else {
+ DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
+ scratch, tmp);
+ r = -EINVAL;
+ }
+err2:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err1:
+ amdgpu_gfx_scratch_free(adev, scratch);
+
+ return r;
+}
+
+static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
+{
+ release_firmware(adev->gfx.pfp_fw);
+ adev->gfx.pfp_fw = NULL;
+ release_firmware(adev->gfx.me_fw);
+ adev->gfx.me_fw = NULL;
+ release_firmware(adev->gfx.ce_fw);
+ adev->gfx.ce_fw = NULL;
+ release_firmware(adev->gfx.rlc_fw);
+ adev->gfx.rlc_fw = NULL;
+ release_firmware(adev->gfx.mec_fw);
+ adev->gfx.mec_fw = NULL;
+ release_firmware(adev->gfx.mec2_fw);
+ adev->gfx.mec2_fw = NULL;
+
+ kfree(adev->gfx.rlc.register_list_format);
+}
+
+static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_1 *rlc_hdr;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+ adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+ adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+ adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+ adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+ adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+ adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+ adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+ adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+ adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+ adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+ adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+ adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+ le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+}
+
+static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ unsigned int *tmp = NULL;
+ unsigned int i = 0;
+ uint16_t version_major;
+ uint16_t version_minor;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ chip_name = "navi10";
+ break;
+ default:
+ BUG();
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
+ err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
+ err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.me_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
+ err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.ce_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+ err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+ version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+ if (version_major == 2 && version_minor == 1)
+ adev->gfx.rlc.is_rlc_v2_1 = true;
+
+ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+ adev->gfx.rlc.save_and_restore_offset =
+ le32_to_cpu(rlc_hdr->save_and_restore_offset);
+ adev->gfx.rlc.clear_state_descriptor_offset =
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
+ adev->gfx.rlc.avail_scratch_ram_locations =
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
+ adev->gfx.rlc.reg_restore_list_size =
+ le32_to_cpu(rlc_hdr->reg_restore_list_size);
+ adev->gfx.rlc.reg_list_format_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_start);
+ adev->gfx.rlc.reg_list_format_separate_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
+ adev->gfx.rlc.starting_offsets_start =
+ le32_to_cpu(rlc_hdr->starting_offsets_start);
+ adev->gfx.rlc.reg_list_format_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
+ adev->gfx.rlc.reg_list_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_size_bytes);
+ adev->gfx.rlc.register_list_format =
+ kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
+ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+ if (!adev->gfx.rlc.register_list_format) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
+
+ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+
+ if (adev->gfx.rlc.is_rlc_v2_1)
+ gfx_v10_0_init_rlc_ext_microcode(adev);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+ err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->gfx.mec_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+ err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
+ if (!err) {
+ err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
+ if (err)
+ goto out;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec2_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ } else {
+ err = 0;
+ adev->gfx.mec2_fw = NULL;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
+ info->fw = adev->gfx.pfp_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
+ info->fw = adev->gfx.me_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
+ info->fw = adev->gfx.ce_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
+ info->fw = adev->gfx.rlc_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ if (adev->gfx.rlc.is_rlc_v2_1 &&
+ adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
+ adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
+ adev->gfx.rlc.save_restore_list_srm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+ }
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
+ info->fw = adev->gfx.mec_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
+ info->fw = adev->gfx.mec_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
+
+ if (adev->gfx.mec2_fw) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+ info->fw = adev->gfx.mec2_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4,
+ PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
+ info->fw = adev->gfx.mec2_fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
+ PAGE_SIZE);
+ }
+ }
+
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "gfx10: Failed to load firmware \"%s\"\n",
+ fw_name);
+ release_firmware(adev->gfx.pfp_fw);
+ adev->gfx.pfp_fw = NULL;
+ release_firmware(adev->gfx.me_fw);
+ adev->gfx.me_fw = NULL;
+ release_firmware(adev->gfx.ce_fw);
+ adev->gfx.ce_fw = NULL;
+ release_firmware(adev->gfx.rlc_fw);
+ adev->gfx.rlc_fw = NULL;
+ release_firmware(adev->gfx.mec_fw);
+ adev->gfx.mec_fw = NULL;
+ release_firmware(adev->gfx.mec2_fw);
+ adev->gfx.mec2_fw = NULL;
+ }
+
+ gfx_v10_0_check_gfxoff_flag(adev);
+
+ return err;
+}
+
+static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev)
+{
+ u32 count = 0;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+
+ /* begin clear state */
+ count += 2;
+ /* context control state */
+ count += 3;
+
+ for (sect = gfx10_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT)
+ count += 2 + ext->reg_count;
+ else
+ return 0;
+ }
+ }
+
+ /* set PA_SC_TILE_STEERING_OVERRIDE */
+ count += 3;
+ /* end clear state */
+ count += 2;
+ /* clear state */
+ count += 2;
+
+ return count;
+}
+
+static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev,
+ volatile u32 *buffer)
+{
+ u32 count = 0, i;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int ctx_reg_offset;
+
+ if (adev->gfx.rlc.cs_data == NULL)
+ return;
+ if (buffer == NULL)
+ return;
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ buffer[count++] = cpu_to_le32(0x80000000);
+ buffer[count++] = cpu_to_le32(0x80000000);
+
+ for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ buffer[count++] =
+ cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
+ buffer[count++] = cpu_to_le32(ext->reg_index -
+ PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ buffer[count++] = cpu_to_le32(ext->extent[i]);
+ } else {
+ return;
+ }
+ }
+ }
+
+ ctx_reg_offset =
+ SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ buffer[count++] = cpu_to_le32(ctx_reg_offset);
+ buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
+ buffer[count++] = cpu_to_le32(0);
+}
+
+static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev)
+{
+ /* clear state block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
+ &adev->gfx.rlc.clear_state_gpu_addr,
+ (void **)&adev->gfx.rlc.cs_ptr);
+
+ /* jump table block */
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
+ &adev->gfx.rlc.cp_table_gpu_addr,
+ (void **)&adev->gfx.rlc.cp_table_ptr);
+}
+
+static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
+{
+ const struct cs_section_def *cs_data;
+ int r;
+
+ adev->gfx.rlc.cs_data = gfx10_cs_data;
+
+ cs_data = adev->gfx.rlc.cs_data;
+
+ if (cs_data) {
+ /* init clear state block */
+ r = amdgpu_gfx_rlc_init_csb(adev);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (!r)
+ adev->gfx.rlc.clear_state_gpu_addr =
+ amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
+
+ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+
+ return r;
+}
+
+static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!adev->gfx.rlc.clear_state_obj)
+ return;
+
+ r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
+ if (likely(r == 0)) {
+ amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
+ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+ }
+}
+
+static void gfx_v10_0_mec_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
+}
+
+static int gfx_v10_0_me_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+
+ amdgpu_gfx_graphics_queue_acquire(adev);
+
+ r = gfx_v10_0_init_microcode(adev);
+ if (r)
+ DRM_ERROR("Failed to load gfx firmware!\n");
+
+ return r;
+}
+
+static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
+{
+ int r;
+ u32 *hpd;
+ const __le32 *fw_data = NULL;
+ unsigned fw_size;
+ u32 *fw = NULL;
+ size_t mec_hpd_size;
+
+ const struct gfx_firmware_header_v1_0 *mec_hdr = NULL;
+
+ bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+ /* take ownership of the relevant compute queues */
+ amdgpu_gfx_compute_queue_acquire(adev);
+ mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
+
+ r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.hpd_eop_obj,
+ &adev->gfx.mec.hpd_eop_gpu_addr,
+ (void **)&hpd);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
+ gfx_v10_0_mec_fini(adev);
+ return r;
+ }
+
+ memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.mec.mec_fw_obj,
+ &adev->gfx.mec.mec_fw_gpu_addr,
+ (void **)&fw);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
+ gfx_v10_0_mec_fini(adev);
+ return r;
+ }
+
+ memcpy(fw, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
+ }
+
+ return 0;
+}
+
+static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
+{
+ WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (address << SQ_IND_INDEX__INDEX__SHIFT));
+ return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
+}
+
+static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
+ uint32_t thread, uint32_t regno,
+ uint32_t num, uint32_t *out)
+{
+ WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
+ (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
+ (regno << SQ_IND_INDEX__INDEX__SHIFT) |
+ (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
+ (SQ_IND_INDEX__AUTO_INCR_MASK));
+ while (num--)
+ *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
+}
+
+static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
+{
+ /* in gfx10 the SIMD_ID is specified as part of the INSTANCE
+ * field when performing a select_se_sh so it should be
+ * zero here */
+ WARN_ON(simd != 0);
+
+ /* type 2 wave data */
+ dst[(*no_fields)++] = 2;
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_INST_DW0);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+}
+
+static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t start,
+ uint32_t size, uint32_t *dst)
+{
+ WARN_ON(simd != 0);
+
+ wave_read_regs(
+ adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
+ dst);
+}
+
+static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t thread,
+ uint32_t start, uint32_t size,
+ uint32_t *dst)
+{
+ wave_read_regs(
+ adev, wave, thread,
+ start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
+}
+
+
+static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v10_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v10_0_select_se_sh,
+ .read_wave_data = &gfx_v10_0_read_wave_data,
+ .read_wave_sgprs = &gfx_v10_0_read_wave_sgprs,
+ .read_wave_vgprs = &gfx_v10_0_read_wave_vgprs,
+};
+
+static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+
+ adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+
+ adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_PIPES);
+
+ adev->gfx.config.max_tile_pipes =
+ adev->gfx.config.gb_addr_config_fields.num_pipes;
+
+ adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
+ adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_RB_PER_SE);
+ adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
+ adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
+ REG_GET_FIELD(adev->gfx.config.gb_addr_config,
+ GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
+}
+
+static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
+ int me, int pipe, int queue)
+{
+ int r;
+ struct amdgpu_ring *ring;
+ unsigned int irq_type;
+
+ ring = &adev->gfx.gfx_ring[ring_id];
+
+ ring->me = me;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ if (!ring_id)
+ ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+ else
+ ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
+ sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->gfx.eop_irq, irq_type);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ int mec, int pipe, int queue)
+{
+ int r;
+ unsigned irq_type;
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+ ring = &adev->gfx.compute_ring[ring_id];
+
+ /* mec0 is me1 */
+ ring->me = mec + 1;
+ ring->pipe = pipe;
+ ring->queue = queue;
+
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
+ ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ + (ring_id * GFX10_MEC_HPD_SIZE);
+ sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+ irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+ + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ + ring->pipe;
+
+ /* type-2 packets are deprecated on MEC, use type-3 instead */
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->gfx.eop_irq, irq_type);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int gfx_v10_0_sw_init(void *handle)
+{
+ int i, j, k, r, ring_id = 0;
+ struct amdgpu_kiq *kiq;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 2;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 2;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+ break;
+ default:
+ adev->gfx.me.num_me = 1;
+ adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_queue_per_pipe = 1;
+ adev->gfx.mec.num_mec = 1;
+ adev->gfx.mec.num_pipe_per_mec = 4;
+ adev->gfx.mec.num_queue_per_pipe = 8;
+ break;
+ }
+
+ /* KIQ event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT,
+ &adev->gfx.kiq.irq);
+ if (r)
+ return r;
+
+ /* EOP Event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
+ GFX_10_1__SRCID__CP_EOP_INTERRUPT,
+ &adev->gfx.eop_irq);
+ if (r)
+ return r;
+
+ /* Privileged reg */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT,
+ &adev->gfx.priv_reg_irq);
+ if (r)
+ return r;
+
+ /* Privileged inst */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_INSTR_FAULT,
+ &adev->gfx.priv_inst_irq);
+ if (r)
+ return r;
+
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ gfx_v10_0_scratch_init(adev);
+
+ r = gfx_v10_0_me_init(adev);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_rlc_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init rlc BOs!\n");
+ return r;
+ }
+
+ r = gfx_v10_0_mec_init(adev);
+ if (r) {
+ DRM_ERROR("Failed to init MEC BOs!\n");
+ return r;
+ }
+
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.me.num_me; i++) {
+ for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
+ continue;
+
+ r = gfx_v10_0_gfx_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+ ring_id++;
+ }
+ }
+ }
+
+ ring_id = 0;
+ /* set up the compute queues - allocate horizontally across pipes */
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k,
+ j))
+ continue;
+
+ r = gfx_v10_0_compute_ring_init(adev, ring_id,
+ i, k, j);
+ if (r)
+ return r;
+
+ ring_id++;
+ }
+ }
+ }
+
+ r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE);
+ if (r) {
+ DRM_ERROR("Failed to init KIQ BOs!\n");
+ return r;
+ }
+
+ kiq = &adev->gfx.kiq;
+ r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
+ if (r)
+ return r;
+
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
+ if (r)
+ return r;
+
+ /* allocate visible FB for rlc auto-loading fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
+ if (r)
+ return r;
+ }
+
+ adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
+
+ gfx_v10_0_gpu_early_init(adev);
+
+ return 0;
+}
+
+static void gfx_v10_0_pfp_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+}
+
+static void gfx_v10_0_ce_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.ce.ce_fw_obj,
+ &adev->gfx.ce.ce_fw_gpu_addr,
+ (void **)&adev->gfx.ce.ce_fw_ptr);
+}
+
+static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+}
+
+static int gfx_v10_0_sw_fini(void *handle)
+{
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+ amdgpu_gfx_mqd_sw_fini(adev);
+ amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
+ amdgpu_gfx_kiq_fini(adev);
+
+ gfx_v10_0_pfp_fini(adev);
+ gfx_v10_0_ce_fini(adev);
+ gfx_v10_0_me_fini(adev);
+ gfx_v10_0_rlc_fini(adev);
+ gfx_v10_0_mec_fini(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
+ gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev);
+
+ gfx_v10_0_free_microcode(adev);
+
+ return 0;
+}
+
+
+static void gfx_v10_0_tiling_mode_table_init(struct amdgpu_device *adev)
+{
+ /* TODO */
+}
+
+static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance)
+{
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
+ instance);
+
+ if (se_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
+
+ if (sh_num == 0xffffffff)
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
+ 1);
+ else
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
+
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
+}
+
+static u32 gfx_v10_0_get_rb_active_bitmap(struct amdgpu_device *adev)
+{
+ u32 data, mask;
+
+ data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
+ data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
+
+ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
+ data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+
+ mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se);
+
+ return (~data) & mask;
+}
+
+static void gfx_v10_0_setup_rb(struct amdgpu_device *adev)
+{
+ int i, j;
+ u32 data;
+ u32 active_rbs = 0;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ data = gfx_v10_0_get_rb_active_bitmap(adev);
+ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+ rb_bitmap_width_per_sh);
+ }
+ }
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ adev->gfx.config.backend_enable_mask = active_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
+}
+
+static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *adev)
+{
+ uint32_t num_sc;
+ uint32_t enabled_rb_per_sh;
+ uint32_t active_rb_bitmap;
+ uint32_t num_rb_per_sc;
+ uint32_t num_packer_per_sc;
+ uint32_t pa_sc_tile_steering_override;
+
+ /* init num_sc */
+ num_sc = adev->gfx.config.max_shader_engines * adev->gfx.config.max_sh_per_se *
+ adev->gfx.config.num_sc_per_sh;
+ /* init num_rb_per_sc */
+ active_rb_bitmap = gfx_v10_0_get_rb_active_bitmap(adev);
+ enabled_rb_per_sh = hweight32(active_rb_bitmap);
+ num_rb_per_sc = enabled_rb_per_sh / adev->gfx.config.num_sc_per_sh;
+ /* init num_packer_per_sc */
+ num_packer_per_sc = adev->gfx.config.num_packer_per_sc;
+
+ pa_sc_tile_steering_override = 0;
+ pa_sc_tile_steering_override |=
+ (order_base_2(num_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_SC__SHIFT) &
+ PA_SC_TILE_STEERING_OVERRIDE__NUM_SC_MASK;
+ pa_sc_tile_steering_override |=
+ (order_base_2(num_rb_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC__SHIFT) &
+ PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC_MASK;
+ pa_sc_tile_steering_override |=
+ (order_base_2(num_packer_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC__SHIFT) &
+ PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC_MASK;
+
+ return pa_sc_tile_steering_override;
+}
+
+#define DEFAULT_SH_MEM_BASES (0x6000)
+#define FIRST_COMPUTE_VMID (8)
+#define LAST_COMPUTE_VMID (16)
+
+static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t sh_mem_config;
+ uint32_t sh_mem_bases;
+
+ /*
+ * Configure apertures:
+ * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
+ * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
+ * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
+ */
+ sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
+
+ sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+ mutex_lock(&adev->srbm_mutex);
+ for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+ nv_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
+{
+ int i, j, k;
+ int max_wgp_per_sh = adev->gfx.config.max_cu_per_sh >> 1;
+ u32 tmp, wgp_active_bitmap = 0;
+ u32 gcrd_targets_disable_tcp = 0;
+ u32 utcl_invreq_disable = 0;
+ /*
+ * GCRD_TARGETS_DISABLE field contains
+ * for Navi10: GL1C=[18:15], SQC=[14:10], TCP=[9:0]
+ */
+ u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask(
+ 2 * max_wgp_per_sh + /* TCP */
+ max_wgp_per_sh + /* SQC */
+ 4); /* GL1C */
+ /*
+ * UTCL1_UTCL0_INVREQ_DISABLE field contains
+ * for Navi10: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0]
+ */
+ u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask(
+ 2 * max_wgp_per_sh + /* TCP */
+ 2 * max_wgp_per_sh + /* SQC */
+ 4 + /* RMI */
+ 1); /* SQG */
+
+ if (adev->asic_type == CHIP_NAVI10) {
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
+ /*
+ * Set corresponding TCP bits for the inactive WGPs in
+ * GCRD_SA_TARGETS_DISABLE
+ */
+ gcrd_targets_disable_tcp = 0;
+ /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
+ utcl_invreq_disable = 0;
+
+ for (k = 0; k < max_wgp_per_sh; k++) {
+ if (!(wgp_active_bitmap & (1 << k))) {
+ gcrd_targets_disable_tcp |= 3 << (2 * k);
+ utcl_invreq_disable |= (3 << (2 * k)) |
+ (3 << (2 * (max_wgp_per_sh + k)));
+ }
+ }
+
+ tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
+ /* only override TCP & SQC bits */
+ tmp &= 0xffffffff << (4 * max_wgp_per_sh);
+ tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
+ WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
+ /* only override TCP bits */
+ tmp &= 0xffffffff << (2 * max_wgp_per_sh);
+ tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
+ WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
+ }
+ }
+
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ }
+}
+
+static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ int i;
+
+ WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+
+ gfx_v10_0_tiling_mode_table_init(adev);
+
+ gfx_v10_0_setup_rb(adev);
+ gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
+ adev->gfx.config.pa_sc_tile_steering_override =
+ gfx_v10_0_init_pa_sc_tile_steering_override(adev);
+
+ /* XXX SH_MEM regs */
+ /* where to put LDS, scratch, GPUVM in FSA64 space */
+ mutex_lock(&adev->srbm_mutex);
+ for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
+ nv_grbm_select(adev, 0, 0, 0, i);
+ /* CP and shaders */
+ if (i == 0) {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_MODE, 0);
+ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
+ WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
+ } else {
+ tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED);
+ tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_MODE, 0);
+ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
+ tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+ (adev->gmc.private_aperture_start >> 48));
+ tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+ (adev->gmc.shared_aperture_start >> 48));
+ WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
+ }
+ }
+ nv_grbm_select(adev, 0, 0, 0, 0);
+
+ mutex_unlock(&adev->srbm_mutex);
+
+ gfx_v10_0_init_compute_vmid(adev);
+
+}
+
+static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
+
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
+ enable ? 1 : 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
+ enable ? 1 : 0);
+
+ WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
+}
+
+static void gfx_v10_0_init_csb(struct amdgpu_device *adev)
+{
+ /* csib */
+ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
+ adev->gfx.rlc.clear_state_gpu_addr >> 32);
+ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
+ adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
+ WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+}
+
+static void gfx_v10_0_init_pg(struct amdgpu_device *adev)
+{
+ gfx_v10_0_init_csb(adev);
+
+ amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+
+ /* TODO: init power gating */
+ return;
+}
+
+void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
+ WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp);
+}
+
+static void gfx_v10_0_rlc_reset(struct amdgpu_device *adev)
+{
+ WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
+ udelay(50);
+ WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
+ udelay(50);
+}
+
+static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t rlc_pg_cntl;
+
+ rlc_pg_cntl = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL);
+
+ if (!enable) {
+ /* RLC_PG_CNTL[23] = 0 (default)
+ * RLC will wait for handshake acks with SMU
+ * GFXOFF will be enabled
+ * RLC_PG_CNTL[23] = 1
+ * RLC will not issue any message to SMU
+ * hence no handshake between SMU & RLC
+ * GFXOFF will be disabled
+ */
+ rlc_pg_cntl |= 0x80000;
+ } else
+ rlc_pg_cntl &= ~0x80000;
+ WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl);
+}
+
+static void gfx_v10_0_rlc_start(struct amdgpu_device *adev)
+{
+ /* TODO: enable rlc & smu handshake until smu
+ * and gfxoff feature works as expected */
+ if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
+ gfx_v10_0_rlc_smu_handshake_cntl(adev, false);
+
+ WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+ udelay(50);
+}
+
+static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* enable Save Restore Machine */
+ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
+ tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
+ tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
+}
+
+static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_0 *hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+
+ if (!adev->gfx.rlc_fw)
+ return -EINVAL;
+
+ hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(&hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
+ RLCG_UCODE_LOADING_START_ADDRESS);
+
+ for (i = 0; i < fw_size; i++)
+ WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA,
+ le32_to_cpup(fw_data++));
+
+ WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
+
+ return 0;
+}
+
+static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
+ if (r)
+ return r;
+ gfx_v10_0_init_pg(adev);
+
+ /* enable RLC SRM */
+ gfx_v10_0_rlc_enable_srm(adev);
+
+ } else {
+ adev->gfx.rlc.funcs->stop(adev);
+
+ /* disable CG */
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
+
+ /* disable PG */
+ WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy rlc firmware loading */
+ r = gfx_v10_0_rlc_load_microcode(adev);
+ if (r)
+ return r;
+ } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ /* rlc backdoor autoload firmware */
+ r = gfx_v10_0_rlc_backdoor_autoload_enable(adev);
+ if (r)
+ return r;
+ }
+
+ gfx_v10_0_init_pg(adev);
+ adev->gfx.rlc.funcs->start(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v10_0_wait_for_rlc_autoload_complete(adev);
+ if (r)
+ return r;
+ }
+ }
+ return 0;
+}
+
+static struct {
+ FIRMWARE_ID id;
+ unsigned int offset;
+ unsigned int size;
+} rlc_autoload_info[FIRMWARE_ID_MAX];
+
+static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev)
+{
+ int ret;
+ RLC_TABLE_OF_CONTENT *rlc_toc;
+
+ ret = amdgpu_bo_create_reserved(adev, adev->psp.toc_bin_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.rlc.rlc_toc_bo,
+ &adev->gfx.rlc.rlc_toc_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_toc_buf);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to create rlc toc bo\n", ret);
+ return ret;
+ }
+
+ /* Copy toc from psp sos fw to rlc toc buffer */
+ memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc_start_addr, adev->psp.toc_bin_size);
+
+ rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf;
+ while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) &&
+ (rlc_toc->id < FIRMWARE_ID_MAX)) {
+ if ((rlc_toc->id >= FIRMWARE_ID_CP_CE) &&
+ (rlc_toc->id <= FIRMWARE_ID_CP_MES)) {
+ /* Offset needs 4KB alignment */
+ rlc_toc->offset = ALIGN(rlc_toc->offset * 4, PAGE_SIZE);
+ }
+
+ rlc_autoload_info[rlc_toc->id].id = rlc_toc->id;
+ rlc_autoload_info[rlc_toc->id].offset = rlc_toc->offset * 4;
+ rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4;
+
+ rlc_toc++;
+ };
+
+ return 0;
+}
+
+static uint32_t gfx_v10_0_calc_toc_total_size(struct amdgpu_device *adev)
+{
+ uint32_t total_size = 0;
+ FIRMWARE_ID id;
+ int ret;
+
+ ret = gfx_v10_0_parse_rlc_toc(adev);
+ if (ret) {
+ dev_err(adev->dev, "failed to parse rlc toc\n");
+ return 0;
+ }
+
+ for (id = FIRMWARE_ID_RLC_G_UCODE; id < FIRMWARE_ID_MAX; id++)
+ total_size += rlc_autoload_info[id].size;
+
+ /* In case the offset in rlc toc ucode is aligned */
+ if (total_size < rlc_autoload_info[FIRMWARE_ID_MAX-1].offset)
+ total_size = rlc_autoload_info[FIRMWARE_ID_MAX-1].offset +
+ rlc_autoload_info[FIRMWARE_ID_MAX-1].size;
+
+ return total_size;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev)
+{
+ int r;
+ uint32_t total_size;
+
+ total_size = gfx_v10_0_calc_toc_total_size(adev);
+
+ r = amdgpu_bo_create_reserved(adev, total_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_toc_bo,
+ &adev->gfx.rlc.rlc_toc_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_toc_buf);
+ amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
+ &adev->gfx.rlc.rlc_autoload_gpu_addr,
+ (void **)&adev->gfx.rlc.rlc_autoload_ptr);
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
+ FIRMWARE_ID id,
+ const void *fw_data,
+ uint32_t fw_size)
+{
+ uint32_t toc_offset;
+ uint32_t toc_fw_size;
+ char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
+
+ if (id <= FIRMWARE_ID_INVALID || id >= FIRMWARE_ID_MAX)
+ return;
+
+ toc_offset = rlc_autoload_info[id].offset;
+ toc_fw_size = rlc_autoload_info[id].size;
+
+ if (fw_size == 0)
+ fw_size = toc_fw_size;
+
+ if (fw_size > toc_fw_size)
+ fw_size = toc_fw_size;
+
+ memcpy(ptr + toc_offset, fw_data, fw_size);
+
+ if (fw_size < toc_fw_size)
+ memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
+{
+ void *data;
+ uint32_t size;
+
+ data = adev->gfx.rlc.rlc_toc_buf;
+ size = rlc_autoload_info[FIRMWARE_ID_RLC_TOC].size;
+
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_RLC_TOC,
+ data, size);
+}
+
+static void gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+
+ /* pfp ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_CP_PFP,
+ fw_data, fw_size);
+
+ /* ce ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.ce_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.ce_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_CP_CE,
+ fw_data, fw_size);
+
+ /* me ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_CP_ME,
+ fw_data, fw_size);
+
+ /* rlc ucode */
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
+ adev->gfx.rlc_fw->data;
+ fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
+ le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_RLC_G_UCODE,
+ fw_data, fw_size);
+
+ /* mec1 ucode */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
+ le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ cp_hdr->jt_size * 4;
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_CP_MEC,
+ fw_data, fw_size);
+ /* mec2 ucode is not necessary if mec2 ucode is same as mec1 */
+}
+
+/* Temporarily put sdma part here */
+static void gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ uint32_t fw_size;
+ const struct sdma_firmware_header_v1_0 *sdma_hdr;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sdma_hdr = (const struct sdma_firmware_header_v1_0 *)
+ adev->sdma.instance[i].fw->data;
+ fw_data = (const __le32 *) (adev->sdma.instance[i].fw->data +
+ le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(sdma_hdr->header.ucode_size_bytes);
+
+ if (i == 0) {
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_SDMA0_UCODE, fw_data, fw_size);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_SDMA0_JT,
+ (uint32_t *)fw_data +
+ sdma_hdr->jt_offset,
+ sdma_hdr->jt_size * 4);
+ } else if (i == 1) {
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_SDMA1_UCODE, fw_data, fw_size);
+ gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev,
+ FIRMWARE_ID_SDMA1_JT,
+ (uint32_t *)fw_data +
+ sdma_hdr->jt_offset,
+ sdma_hdr->jt_size * 4);
+ }
+ }
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
+{
+ uint32_t rlc_g_offset, rlc_g_size, tmp;
+ uint64_t gpu_addr;
+
+ gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(adev);
+ gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(adev);
+ gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(adev);
+
+ rlc_g_offset = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].offset;
+ rlc_g_size = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].size;
+ gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
+
+ WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_HI, upper_32_bits(gpu_addr));
+ WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_LO, lower_32_bits(gpu_addr));
+ WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_SIZE, rlc_g_size);
+
+ tmp = RREG32_SOC15(GC, 0, mmRLC_HYP_RESET_VECTOR);
+ if (!(tmp & (RLC_HYP_RESET_VECTOR__COLD_BOOT_EXIT_MASK |
+ RLC_HYP_RESET_VECTOR__VDDGFX_EXIT_MASK))) {
+ DRM_ERROR("Neither COLD_BOOT_EXIT nor VDDGFX_EXIT is set\n");
+ return -EINVAL;
+ }
+
+ tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
+ if (tmp & RLC_CNTL__RLC_ENABLE_F32_MASK) {
+ DRM_ERROR("RLC ROM should halt itself\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_me_cache(struct amdgpu_device *adev)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+ uint64_t addr;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Program me ucode address into intruction cache address register */
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[FIRMWARE_ID_CP_ME].offset;
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(struct amdgpu_device *adev)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+ uint64_t addr;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Program ce ucode address into intruction cache address register */
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[FIRMWARE_ID_CP_CE].offset;
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(struct amdgpu_device *adev)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+ uint64_t addr;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Program pfp ucode address into intruction cache address register */
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[FIRMWARE_ID_CP_PFP].offset;
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(struct amdgpu_device *adev)
+{
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+ uint32_t tmp;
+ int i;
+ uint64_t addr;
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ /* Program mec1 ucode address into intruction cache address register */
+ addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
+ rlc_autoload_info[FIRMWARE_ID_CP_MEC].offset;
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
+ lower_32_bits(addr) & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
+ upper_32_bits(addr));
+
+ return 0;
+}
+
+static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
+{
+ uint32_t cp_status;
+ uint32_t bootload_status;
+ int i, r;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ cp_status = RREG32_SOC15(GC, 0, mmCP_STAT);
+ bootload_status = RREG32_SOC15(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS);
+ if ((cp_status == 0) &&
+ (REG_GET_FIELD(bootload_status,
+ RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
+ break;
+ }
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
+ r = gfx_v10_0_rlc_backdoor_autoload_config_me_cache(adev);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(adev);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(adev);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(adev);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
+{
+ int i;
+ u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+ tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
+ if (!enable) {
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].sched.ready = false;
+ }
+ WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
+ udelay(50);
+}
+
+static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *pfp_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
+ le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.pfp.pfp_fw_obj,
+ &adev->gfx.pfp.pfp_fw_gpu_addr,
+ (void **)&adev->gfx.pfp.pfp_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
+ gfx_v10_0_pfp_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ adev->nbio_funcs->hdp_flush(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL, tmp);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO,
+ adev->gfx.pfp.pfp_fw_gpu_addr & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI,
+ upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
+
+ return 0;
+}
+
+static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *ce_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ ce_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.ce_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.ce_fw->data +
+ le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, ce_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.ce.ce_fw_obj,
+ &adev->gfx.ce.ce_fw_gpu_addr,
+ (void **)&adev->gfx.ce.ce_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create ce fw bo\n", r);
+ gfx_v10_0_ce_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.ce.ce_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.ce.ce_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.ce.ce_fw_obj);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ adev->nbio_funcs->hdp_flush(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO,
+ adev->gfx.ce.ce_fw_gpu_addr & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI,
+ upper_32_bits(adev->gfx.ce.ce_fw_gpu_addr));
+
+ return 0;
+}
+
+static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ const struct gfx_firmware_header_v1_0 *me_hdr;
+ const __le32 *fw_data;
+ unsigned i, fw_size;
+ uint32_t tmp;
+ uint32_t usec_timeout = 50000; /* wait for 50ms */
+
+ me_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+
+ amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+
+ fw_data = (const __le32 *)(adev->gfx.me_fw->data +
+ le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
+ fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->gfx.me.me_fw_obj,
+ &adev->gfx.me.me_fw_gpu_addr,
+ (void **)&adev->gfx.me.me_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
+ gfx_v10_0_me_fini(adev);
+ return r;
+ }
+
+ memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
+ amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ adev->nbio_funcs->hdp_flush(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO,
+ adev->gfx.me.me_fw_gpu_addr & 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI,
+ upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
+
+ return 0;
+}
+
+static int gfx_v10_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
+ return -EINVAL;
+
+ gfx_v10_0_cp_gfx_enable(adev, false);
+
+ r = gfx_v10_0_cp_gfx_load_pfp_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
+ return r;
+ }
+
+ r = gfx_v10_0_cp_gfx_load_ce_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load ce fw\n", r);
+ return r;
+ }
+
+ r = gfx_v10_0_cp_gfx_load_me_microcode(adev);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to load me fw\n", r);
+ return r;
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ const struct cs_section_def *sect = NULL;
+ const struct cs_extent_def *ext = NULL;
+ int r, i;
+ int ctx_reg_offset;
+
+ /* init the CP */
+ WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT,
+ adev->gfx.config.max_hw_contexts - 1);
+ WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
+
+ gfx_v10_0_cp_gfx_enable(adev, true);
+
+ ring = &adev->gfx.gfx_ring[0];
+ r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, 0x80000000);
+ amdgpu_ring_write(ring, 0x80000000);
+
+ for (sect = gfx10_cs_data; sect->section != NULL; ++sect) {
+ for (ext = sect->section; ext->extent != NULL; ++ext) {
+ if (sect->id == SECT_CONTEXT) {
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONTEXT_REG,
+ ext->reg_count));
+ amdgpu_ring_write(ring, ext->reg_index -
+ PACKET3_SET_CONTEXT_REG_START);
+ for (i = 0; i < ext->reg_count; i++)
+ amdgpu_ring_write(ring, ext->extent[i]);
+ }
+ }
+ }
+
+ ctx_reg_offset =
+ SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
+ amdgpu_ring_write(ring, ctx_reg_offset);
+ amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+ amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
+ amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
+ amdgpu_ring_write(ring, 0x8000);
+ amdgpu_ring_write(ring, 0x8000);
+
+ amdgpu_ring_commit(ring);
+
+ /* submit cs packet to copy state 0 to next available state */
+ ring = &adev->gfx.gfx_ring[1];
+ r = amdgpu_ring_alloc(ring, 2);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_commit(ring);
+
+ return 0;
+}
+
+static void gfx_v10_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
+ CP_PIPE_ID pipe)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
+
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, tmp);
+}
+
+static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
+ tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
+ DOORBELL_RANGE_LOWER, ring->doorbell_index);
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
+
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
+ CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
+}
+
+static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 tmp;
+ u32 rb_bufsz;
+ u64 rb_addr, rptr_addr, wptr_gpu_addr;
+ u32 i;
+
+ /* Set the write pointer delay */
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
+
+ /* set the RB to use vmid 0 */
+ WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
+
+ /* Init gfx ring 0 for pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+ mutex_unlock(&adev->srbm_mutex);
+ /* Set ring buffer size */
+ ring = &adev->gfx.gfx_ring[0];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
+#endif
+ WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
+
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+
+ /* set the wb address wether it's enabled or not */
+ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+ WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1);
+
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
+ /* Init gfx ring 1 for pipe 1 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
+ mutex_unlock(&adev->srbm_mutex);
+ ring = &adev->gfx.gfx_ring[1];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
+ /* Set the wb address wether it's enabled or not */
+ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
+
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+
+ /* Switch to pipe 0 */
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ /* start the ring */
+ gfx_v10_0_cp_gfx_start(adev);
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ ring->sched.ready = true;
+ }
+
+ return 0;
+}
+
+static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
+{
+ int i;
+
+ if (enable) {
+ WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
+ } else {
+ WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
+ (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].sched.ready = false;
+ adev->gfx.kiq.ring.sched.ready = false;
+ }
+ udelay(50);
+}
+
+static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
+{
+ const struct gfx_firmware_header_v1_0 *mec_hdr;
+ const __le32 *fw_data;
+ unsigned i;
+ u32 tmp;
+ u32 usec_timeout = 50000; /* Wait for 50 ms */
+
+ if (!adev->gfx.mec_fw)
+ return -EINVAL;
+
+ gfx_v10_0_cp_compute_enable(adev, false);
+
+ mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
+
+ /* Trigger an invalidation of the L1 instruction caches */
+ tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp);
+
+ /* Wait for invalidation complete */
+ for (i = 0; i < usec_timeout; i++) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL);
+ if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
+ INVALIDATE_CACHE_COMPLETE))
+ break;
+ udelay(1);
+ }
+
+ if (i >= usec_timeout) {
+ dev_err(adev->dev, "failed to invalidate instruction cache\n");
+ return -EINVAL;
+ }
+
+ if (amdgpu_emu_mode == 1)
+ adev->nbio_funcs->hdp_flush(adev, NULL);
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr &
+ 0xFFFFF000);
+ WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
+ upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
+
+ /* MEC1 */
+ WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 0);
+
+ for (i = 0; i < mec_hdr->jt_size; i++)
+ WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
+ le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
+
+ WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
+
+ /*
+ * TODO: Loading MEC2 firmware is only necessary if MEC2 should run
+ * different microcode than MEC1.
+ */
+
+ return 0;
+}
+
+static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
+{
+ uint32_t tmp;
+ struct amdgpu_device *adev = ring->adev;
+
+ /* tell RLC which is KIQ queue */
+ tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
+ tmp &= 0xffffff00;
+ tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ tmp |= 0x80;
+ WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+}
+
+static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr;
+ uint32_t tmp;
+ uint32_t rb_bufsz;
+
+ /* set up gfx hqd wptr */
+ mqd->cp_gfx_hqd_wptr = 0;
+ mqd->cp_gfx_hqd_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set up mqd control */
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
+ mqd->cp_gfx_mqd_control = tmp;
+
+ /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
+ mqd->cp_gfx_hqd_vmid = 0;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority */
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+
+ /* set up time quantum */
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
+ mqd->cp_gfx_hqd_quantum = tmp;
+
+ /* set up gfx hqd base. this is similar as CP_RB_BASE */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_gfx_hqd_base = hqd_gpu_addr;
+ mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_gfx_hqd_rptr_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up rb_wptr_poll addr */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
+ rb_bufsz = order_base_2(ring->ring_size / 4) - 1;
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
+#endif
+ mqd->cp_gfx_hqd_cntl = tmp;
+
+ /* set up cp_doorbell_control */
+ tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ } else
+ tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ mqd->cp_rb_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR);
+
+ /* active the queue */
+ mqd->cp_gfx_hqd_active = 1;
+
+ return 0;
+}
+
+#ifdef BRING_UP_DEBUG
+static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+
+ /* set mmCP_GFX_HQD_WPTR/_HI to 0 */
+ WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr);
+ WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi);
+
+ /* set GFX_MQD_BASE */
+ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr);
+ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+
+ /* set GFX_MQD_CONTROL */
+ WREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control);
+
+ /* set GFX_HQD_VMID to 0 */
+ WREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid);
+
+ WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY,
+ mqd->cp_gfx_hqd_queue_priority);
+ WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum);
+
+ /* set GFX_HQD_BASE, similar as CP_RB_BASE */
+ WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base);
+ WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi);
+
+ /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */
+ WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr);
+ WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi);
+
+ /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */
+ WREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl);
+
+ /* set RB_WPTR_POLL_ADDR */
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi);
+
+ /* set RB_DOORBELL_CONTROL */
+ WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control);
+
+ /* active the queue */
+ WREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active);
+
+ return 0;
+}
+#endif
+
+static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_gfx_mqd *mqd = ring->mqd_ptr;
+
+ if (!adev->in_gpu_reset && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v10_0_gfx_mqd_init(ring);
+#ifdef BRING_UP_DEBUG
+ gfx_v10_0_gfx_queue_init_register(ring);
+#endif
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
+ memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd));
+ } else if (adev->in_gpu_reset) {
+ /* reset mqd with the backup copy */
+ if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS])
+ memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd));
+ /* reset the ring */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+#ifdef BRING_UP_DEBUG
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v10_0_gfx_queue_init_register(ring);
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+#endif
+ } else {
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+#ifndef BRING_UP_DEBUG
+static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+ int r, i;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
+ return -EINVAL;
+
+ r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+ adev->gfx.num_gfx_rings);
+ if (r) {
+ DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
+
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r) {
+ DRM_ERROR("kfq enable failed\n");
+ kiq_ring->sched.ready = false;
+ }
+ return r;
+}
+#endif
+
+static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v10_0_gfx_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+ }
+#ifndef BRING_UP_DEBUG
+ r = gfx_v10_0_kiq_enable_kgq(adev);
+ if (r)
+ goto done;
+#endif
+ r = gfx_v10_0_cp_gfx_start(adev);
+ if (r)
+ goto done;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ ring->sched.ready = true;
+ }
+done:
+ return r;
+}
+
+static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
+ uint32_t tmp;
+
+ mqd->header = 0xC0310800;
+ mqd->compute_pipelinestat_enable = 0x00000001;
+ mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+ mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
+ mqd->compute_misc_reserved = 0x00000003;
+
+ eop_base_addr = ring->eop_gpu_addr >> 8;
+ mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
+ mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
+ (order_base_2(GFX10_MEC_HPD_SIZE / 4) - 1));
+
+ mqd->cp_hqd_eop_control = tmp;
+
+ /* enable doorbell? */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
+
+ if (ring->use_doorbell) {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ } else {
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* disable the queue if it's active */
+ ring->wptr = 0;
+ mqd->cp_hqd_dequeue_request = 0;
+ mqd->cp_hqd_pq_rptr = 0;
+ mqd->cp_hqd_pq_wptr_lo = 0;
+ mqd->cp_hqd_pq_wptr_hi = 0;
+
+ /* set the pointer to the MQD */
+ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
+ mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
+
+ /* set MQD vmid to 0 */
+ tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
+ mqd->cp_mqd_control = tmp;
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ hqd_gpu_addr = ring->gpu_addr >> 8;
+ mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+ mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
+ (order_base_2(ring->ring_size / 4) - 1));
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
+ ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
+#ifdef __BIG_ENDIAN
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
+#endif
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
+ mqd->cp_hqd_pq_control = tmp;
+
+ /* set the wb address whether it's enabled or not */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_rptr_report_addr_hi =
+ upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+ mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+ tmp = 0;
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_OFFSET, ring->doorbell_index);
+
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 1);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_SOURCE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 0);
+ }
+
+ mqd->cp_hqd_pq_doorbell_control = tmp;
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ ring->wptr = 0;
+ mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
+
+ /* set the vmid for the queue */
+ mqd->cp_hqd_vmid = 0;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
+ mqd->cp_hqd_persistent_state = tmp;
+
+ /* set MIN_IB_AVAIL_SIZE */
+ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
+ tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
+ mqd->cp_hqd_ib_control = tmp;
+
+ /* activate the queue */
+ mqd->cp_hqd_active = 1;
+
+ return 0;
+}
+
+static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ int j;
+
+ /* disable wptr polling */
+ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+
+ /* write the EOP addr */
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
+ mqd->cp_hqd_eop_base_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
+ mqd->cp_hqd_eop_base_addr_hi);
+
+ /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+ WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
+ mqd->cp_hqd_eop_control);
+
+ /* enable doorbell? */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+ mqd->cp_hqd_dequeue_request);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
+ mqd->cp_hqd_pq_rptr);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+ }
+
+ /* set the pointer to the MQD */
+ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
+ mqd->cp_mqd_base_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
+ mqd->cp_mqd_base_addr_hi);
+
+ /* set MQD vmid to 0 */
+ WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
+ mqd->cp_mqd_control);
+
+ /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
+ mqd->cp_hqd_pq_base_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
+ mqd->cp_hqd_pq_base_hi);
+
+ /* set up the HQD, this is similar to CP_RB0_CNTL */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
+ mqd->cp_hqd_pq_control);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
+ mqd->cp_hqd_pq_rptr_report_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ mqd->cp_hqd_pq_rptr_report_addr_hi);
+
+ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ mqd->cp_hqd_pq_wptr_poll_addr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ mqd->cp_hqd_pq_wptr_poll_addr_hi);
+
+ /* enable the doorbell if requested */
+ if (ring->use_doorbell) {
+ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
+ (adev->doorbell_index.kiq * 2) << 2);
+ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
+ (adev->doorbell_index.userqueue_end * 2) << 2);
+ }
+
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+ mqd->cp_hqd_pq_doorbell_control);
+
+ /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ mqd->cp_hqd_pq_wptr_lo);
+ WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ mqd->cp_hqd_pq_wptr_hi);
+
+ /* set the vmid for the queue */
+ WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
+
+ WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
+ mqd->cp_hqd_persistent_state);
+
+ /* activate the queue */
+ WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
+ mqd->cp_hqd_active);
+
+ if (ring->use_doorbell)
+ WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+
+ return 0;
+}
+
+static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
+
+ gfx_v10_0_kiq_setting(ring);
+
+ if (adev->in_gpu_reset) { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v10_0_kiq_init_register(ring);
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+ } else {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v10_0_compute_mqd_init(ring);
+ gfx_v10_0_kiq_init_register(ring);
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_compute_mqd *mqd = ring->mqd_ptr;
+ int mqd_idx = ring - &adev->gfx.compute_ring[0];
+
+ if (!adev->in_gpu_reset && !adev->in_suspend) {
+ memset((void *)mqd, 0, sizeof(*mqd));
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+ gfx_v10_0_compute_mqd_init(ring);
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+ } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
+ /* reset MQD to a clean status */
+ if (adev->gfx.mec.mqd_backup[mqd_idx])
+ memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
+ } else {
+ amdgpu_ring_clear_ring(ring);
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ int r;
+
+ ring = &adev->gfx.kiq.ring;
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (unlikely(r != 0))
+ return r;
+
+ gfx_v10_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ ring->sched.ready = true;
+ return 0;
+}
+
+static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r = 0, i;
+
+ gfx_v10_0_cp_compute_enable(adev, true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+
+ r = amdgpu_bo_reserve(ring->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto done;
+ r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
+ if (!r) {
+ r = gfx_v10_0_kcq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
+ }
+ amdgpu_bo_unreserve(ring->mqd_obj);
+ if (r)
+ goto done;
+ }
+
+ r = amdgpu_gfx_enable_kcq(adev);
+done:
+ return r;
+}
+
+static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
+{
+ int r, i;
+ struct amdgpu_ring *ring;
+
+ if (!(adev->flags & AMD_IS_APU))
+ gfx_v10_0_enable_gui_idle_interrupt(adev, false);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /* legacy firmware loading */
+ r = gfx_v10_0_cp_gfx_load_microcode(adev);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_cp_compute_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ r = gfx_v10_0_kiq_resume(adev);
+ if (r)
+ return r;
+
+ r = gfx_v10_0_kcq_resume(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_async_gfx_ring) {
+ r = gfx_v10_0_cp_gfx_resume(adev);
+ if (r)
+ return r;
+ } else {
+ r = gfx_v10_0_cp_async_gfx_ring_resume(adev);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ DRM_INFO("gfx %d ring me %d pipe %d q %d\n",
+ i, ring->me, ring->pipe, ring->queue);
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->sched.ready = false;
+ return r;
+ }
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ ring->sched.ready = true;
+ DRM_INFO("compute ring %d mec %d pipe %d q %d\n",
+ i, ring->me, ring->pipe, ring->queue);
+ r = amdgpu_ring_test_ring(ring);
+ if (r)
+ ring->sched.ready = false;
+ }
+
+ return 0;
+}
+
+static void gfx_v10_0_cp_enable(struct amdgpu_device *adev, bool enable)
+{
+ gfx_v10_0_cp_gfx_enable(adev, enable);
+ gfx_v10_0_cp_compute_enable(adev, enable);
+}
+
+static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)
+{
+ uint32_t data, pattern = 0xDEADBEEF;
+
+ /* check if mmVGT_ESGS_RING_SIZE_UMD
+ * has been remapped to mmVGT_ESGS_RING_SIZE */
+ data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);
+
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0);
+
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern);
+
+ if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) {
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data);
+ return true;
+ } else {
+ WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data);
+ return false;
+ }
+}
+
+static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ /* initialize cam_index to 0
+ * index will auto-inc after each data writting */
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0);
+
+ /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */
+ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+
+ /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */
+ data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) <<
+ GRBM_CAM_DATA__CAM_ADDR__SHIFT) |
+ (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) <<
+ GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0);
+ WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);
+}
+
+static int gfx_v10_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = gfx_v10_0_csb_vram_pin(adev);
+ if (r)
+ return r;
+
+ if (!amdgpu_emu_mode)
+ gfx_v10_0_init_golden_registers(adev);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ /**
+ * For gfx 10, rlc firmware loading relies on smu firmware is
+ * loaded firstly, so in direct type, it has to load smc ucode
+ * here before rlc.
+ */
+ r = smu_load_microcode(&adev->smu);
+ if (r)
+ return r;
+
+ r = smu_check_fw_status(&adev->smu);
+ if (r) {
+ pr_err("SMC firmware status is not correct\n");
+ return r;
+ }
+ }
+
+ /* if GRBM CAM not remapped, set up the remapping */
+ if (!gfx_v10_0_check_grbm_cam_remapping(adev))
+ gfx_v10_0_setup_grbm_cam_remapping(adev);
+
+ gfx_v10_0_constants_init(adev);
+
+ r = gfx_v10_0_rlc_resume(adev);
+ if (r)
+ return r;
+
+ /*
+ * init golden registers and rlc resume may override some registers,
+ * reconfig them here
+ */
+ gfx_v10_0_tcp_harvest(adev);
+
+ r = gfx_v10_0_cp_resume(adev);
+ if (r)
+ return r;
+
+ return r;
+}
+
+#ifndef BRING_UP_DEBUG
+static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev)
+{
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+ int i;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+ adev->gfx.num_gfx_rings))
+ return -ENOMEM;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
+ PREEMPT_QUEUES, 0, 0);
+
+ return amdgpu_ring_test_ring(kiq_ring);
+}
+#endif
+
+static int gfx_v10_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+#ifndef BRING_UP_DEBUG
+ if (amdgpu_async_gfx_ring) {
+ r = gfx_v10_0_kiq_disable_kgq(adev);
+ if (r)
+ DRM_ERROR("KGQ disable failed\n");
+ }
+#endif
+ if (amdgpu_gfx_disable_kcq(adev))
+ DRM_ERROR("KCQ disable failed\n");
+ if (amdgpu_sriov_vf(adev)) {
+ pr_debug("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+ gfx_v10_0_cp_enable(adev, false);
+ gfx_v10_0_enable_gui_idle_interrupt(adev, false);
+ gfx_v10_0_csb_vram_unpin(adev);
+
+ return 0;
+}
+
+static int gfx_v10_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->in_suspend = true;
+ return gfx_v10_0_hw_fini(adev);
+}
+
+static int gfx_v10_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = gfx_v10_0_hw_init(adev);
+ adev->in_suspend = false;
+ return r;
+}
+
+static bool gfx_v10_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
+ GRBM_STATUS, GUI_ACTIVE))
+ return false;
+ else
+ return true;
+}
+
+static int gfx_v10_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ /* read MC_STATUS */
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) &
+ GRBM_STATUS__GUI_ACTIVE_MASK;
+
+ if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int gfx_v10_0_soft_reset(void *handle)
+{
+ u32 grbm_soft_reset = 0;
+ u32 tmp;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* GRBM_STATUS */
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
+ if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
+ GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
+ GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK |
+ GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK |
+ GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK
+ | GRBM_STATUS__BCI_BUSY_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP,
+ 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_GFX,
+ 1);
+ }
+
+ if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_CP,
+ 1);
+ }
+
+ /* GRBM_STATUS2 */
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
+ if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
+ GRBM_SOFT_RESET, SOFT_RESET_RLC,
+ 1);
+
+ if (grbm_soft_reset) {
+ /* stop the rlc */
+ gfx_v10_0_rlc_stop(adev);
+
+ /* Disable GFX parsing/prefetching */
+ gfx_v10_0_cp_gfx_enable(adev, false);
+
+ /* Disable MEC parsing/prefetching */
+ gfx_v10_0_cp_compute_enable(adev, false);
+
+ if (grbm_soft_reset) {
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ tmp |= grbm_soft_reset;
+ dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+ udelay(50);
+
+ tmp &= ~grbm_soft_reset;
+ WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+ tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+ }
+
+ /* Wait a little for things to settle down */
+ udelay(50);
+ }
+ return 0;
+}
+
+static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ uint64_t clock;
+
+ mutex_lock(&adev->gfx.gpu_clock_mutex);
+ WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+ clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+ ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ return clock;
+}
+
+static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* GDS Base */
+ gfx_v10_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
+ gds_base);
+
+ /* GDS Size */
+ gfx_v10_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
+ gds_size);
+
+ /* GWS */
+ gfx_v10_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
+ gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
+
+ /* OA */
+ gfx_v10_0_write_data_to_reg(ring, 0, false,
+ SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
+ (1 << (oa_size + oa_base)) - (1 << oa_base));
+}
+
+static int gfx_v10_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS;
+ adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+
+ gfx_v10_0_set_kiq_pm4_funcs(adev);
+ gfx_v10_0_set_ring_funcs(adev);
+ gfx_v10_0_set_irq_funcs(adev);
+ gfx_v10_0_set_gds_init(adev);
+ gfx_v10_0_set_rlc_funcs(adev);
+
+ return 0;
+}
+
+static int gfx_v10_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev)
+{
+ uint32_t rlc_cntl;
+
+ /* if RLC is not enabled, do nothing */
+ rlc_cntl = RREG32_SOC15(GC, 0, mmRLC_CNTL);
+ return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
+}
+
+static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)
+{
+ uint32_t data;
+ unsigned i;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
+
+ /* wait for RLC_SAFE_MODE */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+ break;
+ udelay(1);
+ }
+}
+
+static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ data = RLC_SAFE_MODE__CMD_MASK;
+ WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
+}
+
+static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ /* It is disabled by HW by default */
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ /* 1 - RLC_CGTT_MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
+ /* only for Vega10 & Raven1 */
+ data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* MGLS is a global flag to control all MGLS in GFX */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ /* 2 - RLC memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
+ def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
+ data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
+ }
+ /* 3 - CP memory Light sleep */
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
+ def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+ data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
+ }
+ }
+ } else {
+ /* 1 - MGCG_OVERRIDE */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
+ RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* 2 - disable MGLS in RLC */
+ data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
+ data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
+ }
+
+ /* 3 - disable MGLS in CP */
+ data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
+ }
+ }
+}
+
+static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+
+ /* Enable 3D CGCG/CGLS */
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+ /* write cmd to clear cgcg/cgls ov */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ /* unset CGCG override */
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+ /* enable 3Dcgcg FSM(0x0000363f) */
+ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+ data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
+ } else {
+ /* Disable CGCG/CGLS */
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+ /* disable cgcg, cgls should be disabled */
+ data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
+ RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
+ }
+}
+
+static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ /* unset CGCG override */
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ else
+ data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+ /* update CGCG and CGLS override bits */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
+ /* enable cgcg FSM(0x0000363F) */
+ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+ data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+ data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
+ RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
+
+ /* set IDLE_POLL_COUNT(0x00900100) */
+ def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
+ data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
+ (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
+ } else {
+ def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+ /* reset CGCG/CGLS bits */
+ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+ /* disable cgcg and cgls in FSM */
+ if (def != data)
+ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
+ }
+}
+
+static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev);
+
+ if (enable) {
+ /* CGCG/CGLS should be enabled after MGCG/MGLS
+ * === MGCG + MGLS ===
+ */
+ gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
+ /* === CGCG /CGLS for GFX 3D Only === */
+ gfx_v10_0_update_3d_clock_gating(adev, enable);
+ /* === CGCG + CGLS === */
+ gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
+ } else {
+ /* CGCG/CGLS should be disabled before MGCG/MGLS
+ * === CGCG + CGLS ===
+ */
+ gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
+ /* === CGCG /CGLS for GFX 3D Only === */
+ gfx_v10_0_update_3d_clock_gating(adev, enable);
+ /* === MGCG + MGLS === */
+ gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
+ }
+
+ if (adev->cg_flags &
+ (AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS))
+ gfx_v10_0_enable_gui_idle_interrupt(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev);
+
+ return 0;
+}
+
+static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
+ .is_rlc_enabled = gfx_v10_0_is_rlc_enabled,
+ .set_safe_mode = gfx_v10_0_set_safe_mode,
+ .unset_safe_mode = gfx_v10_0_unset_safe_mode,
+ .init = gfx_v10_0_rlc_init,
+ .get_csb_size = gfx_v10_0_get_csb_size,
+ .get_csb_buffer = gfx_v10_0_get_csb_buffer,
+ .resume = gfx_v10_0_rlc_resume,
+ .stop = gfx_v10_0_rlc_stop,
+ .reset = gfx_v10_0_rlc_reset,
+ .start = gfx_v10_0_rlc_start
+};
+
+static int gfx_v10_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ if (!enable) {
+ amdgpu_gfx_off_ctrl(adev, false);
+ cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+ } else
+ amdgpu_gfx_off_ctrl(adev, true);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v10_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ gfx_v10_0_update_gfx_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ /* AMD_CG_SUPPORT_GFX_MGCG */
+ data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+ if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
+ *flags |= AMD_CG_SUPPORT_GFX_MGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGCG */
+ data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+ if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CGLS;
+
+ /* AMD_CG_SUPPORT_GFX_RLC_LS */
+ data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
+ if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_CP_LS */
+ data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGCG */
+ data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
+
+ /* AMD_CG_SUPPORT_GFX_3D_CGLS */
+ if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
+}
+
+static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+ return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 is 32bit rptr*/
+}
+
+static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
+ } else {
+ wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
+ wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
+ }
+
+ return wptr;
+}
+
+static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+ }
+}
+
+static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
+{
+ return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 hardware is 32bit rptr */
+}
+
+static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
+{
+ u64 wptr;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell)
+ wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
+ else
+ BUG();
+ return wptr;
+}
+
+static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx10 now */
+ }
+}
+
+static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask, reg_mem_engine;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ switch (ring->me) {
+ case 1:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
+ break;
+ case 2:
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
+ break;
+ default:
+ return;
+ }
+ reg_mem_engine = 0;
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ reg_mem_engine = 1; /* pfp */
+ }
+
+ gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
+ adev->nbio_funcs->get_hdp_flush_req_offset(adev),
+ adev->nbio_funcs->get_hdp_flush_done_offset(adev),
+ ref_and_mask, ref_and_mask, 0x20);
+}
+
+static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 header, control = 0;
+
+ /* Prevent a hw deadlock due to a wave ID mismatch between ME and GDS.
+ * This resets the wave ID counters. (needed by transform feedback)
+ * TODO: This might only be needed on a VMID switch when we change
+ * the GDS OA mapping, not sure.
+ */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmVGT_GS_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.vgt_gs_max_wave_id);
+
+ if (ib->flags & AMDGPU_IB_FLAG_CE)
+ header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2);
+ else
+ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+
+ control |= ib->length_dw | (vmid << 24);
+
+ if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ control |= INDIRECT_BUFFER_PRE_ENB(1);
+
+ if (flags & AMDGPU_IB_PREEMPTED)
+ control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
+ if (!(ib->flags & AMDGPU_IB_FLAG_CE))
+ gfx_v10_0_ring_emit_de_meta(ring,
+ flags & AMDGPU_IB_PREEMPTED ? true : false);
+ }
+
+ amdgpu_ring_write(ring, header);
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+
+ /* Currently, there is a high possibility to get wave ID mismatch
+ * between ME and GDS, leading to a hw deadlock, because ME generates
+ * different wave IDs than the GDS expects. This situation happens
+ * randomly when at least 5 compute pipes use GDS ordered append.
+ * The wave IDs generated by ME are also wrong after suspend/resume.
+ * Those are probably bugs somewhere else in the kernel driver.
+ *
+ * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+ * GDS to 0 for this ring (me/pipe).
+ */
+ if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+ amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
+ amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+ amdgpu_ring_write(ring,
+#ifdef __BIG_ENDIAN
+ (2 << 0) |
+#endif
+ lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, control);
+}
+
+static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+
+ /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */
+ if (adev->pdev->device == 0x50)
+ int_sel = false;
+
+ /* RELEASE_MEM - flush caches, send int */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
+ PACKET3_RELEASE_MEM_GCR_GL2_WB |
+ PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
+ PACKET3_RELEASE_MEM_GCR_GLM_WB |
+ PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+ PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+ PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
+
+ /*
+ * the address should be Qword aligned if 64bit write, Dword
+ * aligned if only send 32bit data low (discard data high)
+ */
+ if (write64bit)
+ BUG_ON(addr & 0x7);
+ else
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ gfx_v10_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
+ upper_32_bits(addr), seq, 0xffffffff, 4);
+}
+
+static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* compute doesn't have PFP */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+ }
+}
+
+static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned int flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ /* we only allocate 32bit for each seq wb address */
+ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ /* write fence seq to the "addr" */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ if (flags & AMDGPU_FENCE_FLAG_INT) {
+ /* set register to trigger INT */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
+ amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ }
+}
+
+static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+}
+
+static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ if (amdgpu_mcbp)
+ gfx_v10_0_ring_emit_ce_meta(ring,
+ flags & AMDGPU_IB_PREEMPTED ? true : false);
+
+ gfx_v10_0_ring_emit_tmz(ring, true);
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & AMDGPU_HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+
+ /* set load_ce_ram if preamble presented */
+ if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
+ dw2 |= 0x10000000;
+ } else {
+ /* still load_ce_ram if this is the first time preamble presented
+ * although there is no context switch happens.
+ */
+ if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
+ dw2 |= 0x10000000;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
+static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+ ret = ring->wptr & ring->buf_mask;
+ amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+
+ return ret;
+}
+
+static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
+{
+ unsigned cur;
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (likely(cur > offset))
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
+}
+
+static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *kiq_ring = &kiq->ring;
+
+ if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+ return -EINVAL;
+
+ if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size))
+ return -ENOMEM;
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* assert IB preemption, emit the trailing fence */
+ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+ ring->trail_fence_gpu_addr,
+ ++ring->trail_seq);
+ amdgpu_ring_commit(kiq_ring);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ DRM_UDELAY(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
+ }
+
+ /* deassert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_ce_ib_state ce_payload = {0};
+ uint64_t csa_addr;
+ int cnt;
+
+ cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
+ csa_addr = amdgpu_csa_vaddr(ring->adev);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(csa_addr +
+ offsetof(struct v10_gfx_meta_data, ce_payload)));
+ amdgpu_ring_write(ring, upper_32_bits(csa_addr +
+ offsetof(struct v10_gfx_meta_data, ce_payload)));
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
+ offsetof(struct v10_gfx_meta_data,
+ ce_payload),
+ sizeof(ce_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
+ sizeof(ce_payload) >> 2);
+}
+
+static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct v10_de_ib_state de_payload = {0};
+ uint64_t csa_addr, gds_addr;
+ int cnt;
+
+ csa_addr = amdgpu_csa_vaddr(ring->adev);
+ gds_addr = ALIGN(csa_addr + AMDGPU_CSA_SIZE - adev->gds.gds_size,
+ PAGE_SIZE);
+ de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
+ de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
+
+ cnt = (sizeof(de_payload) >> 2) + 4 - 2;
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
+ WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) |
+ WRITE_DATA_CACHE_POLICY(0));
+ amdgpu_ring_write(ring, lower_32_bits(csa_addr +
+ offsetof(struct v10_gfx_meta_data, de_payload)));
+ amdgpu_ring_write(ring, upper_32_bits(csa_addr +
+ offsetof(struct v10_gfx_meta_data, de_payload)));
+
+ if (resume)
+ amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr +
+ offsetof(struct v10_gfx_meta_data,
+ de_payload),
+ sizeof(de_payload) >> 2);
+ else
+ amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+ sizeof(de_payload) >> 2);
+}
+
+static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
+ amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
+}
+
+static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
+ amdgpu_ring_write(ring, 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20)); /* write confirm */
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
+ adev->virt.reg_val_offs * 4));
+ amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
+ adev->virt.reg_val_offs * 4));
+}
+
+static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val)
+{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, cmd);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
+static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
+static void
+gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ uint32_t me, uint32_t pipe,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t cp_int_cntl, cp_int_cntl_reg;
+
+ if (!me) {
+ switch (pipe) {
+ case 0:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0);
+ break;
+ case 1:
+ cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ cp_int_cntl = RREG32(cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 0);
+ WREG32(cp_int_cntl_reg, cp_int_cntl);
+ case AMDGPU_IRQ_STATE_ENABLE:
+ cp_int_cntl = RREG32(cp_int_cntl_reg);
+ cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
+ TIME_STAMP_INT_ENABLE, 1);
+ WREG32(cp_int_cntl_reg, cp_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
+ int me, int pipe,
+ enum amdgpu_interrupt_state state)
+{
+ u32 mec_int_cntl, mec_int_cntl_reg;
+
+ /*
+ * amdgpu controls only the first MEC. That's why this function only
+ * handles the setting of interrupts for this specific MEC. All other
+ * pipes' interrupts are set by amdkfd.
+ */
+
+ if (me == 1) {
+ switch (pipe) {
+ case 0:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ break;
+ case 1:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
+ break;
+ case 2:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
+ break;
+ case 3:
+ mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
+ break;
+ default:
+ DRM_DEBUG("invalid pipe %d\n", pipe);
+ return;
+ }
+ } else {
+ DRM_DEBUG("invalid me %d\n", me);
+ return;
+ }
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 0);
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ mec_int_cntl = RREG32(mec_int_cntl_reg);
+ mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
+ TIME_STAMP_INT_ENABLE, 1);
+ WREG32(mec_int_cntl_reg, mec_int_cntl);
+ break;
+ default:
+ break;
+ }
+}
+
+static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (type) {
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
+ gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
+ gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
+ break;
+ case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
+ gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ int i;
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+
+ DRM_DEBUG("IH: CP EOP\n");
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ if (pipe_id == 0)
+ amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+ else
+ amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ /* Per-queue interrupt is supported for MEC starting from VI.
+ * The interrupt can only be enabled/disabled per pipe instead of per queue.
+ */
+ if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
+ amdgpu_fence_process(ring);
+ }
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ PRIV_REG_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ case AMDGPU_IRQ_STATE_ENABLE:
+ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+ PRIV_INSTR_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring;
+ int i;
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+
+ switch (me_id) {
+ case 0:
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ /* we only enabled 1 gfx queue per pipe for now */
+ if (ring->me == me_id && ring->pipe == pipe_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ case 1:
+ case 2:
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ if (ring->me == me_id && ring->pipe == pipe_id &&
+ ring->queue == queue_id)
+ drm_sched_fault(&ring->sched);
+ }
+ break;
+ default:
+ BUG();
+ }
+}
+
+static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal register access in command stream\n");
+ gfx_v10_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_ERROR("Illegal instruction in command stream\n");
+ gfx_v10_0_handle_priv_fault(adev, entry);
+ return 0;
+}
+
+static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ uint32_t tmp, target;
+ struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+
+ if (ring->me == 1)
+ target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
+ else
+ target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL);
+ target += ring->pipe;
+
+ switch (type) {
+ case AMDGPU_CP_KIQ_IRQ_DRIVER0:
+ if (state == AMDGPU_IRQ_STATE_DISABLE) {
+ tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
+
+ tmp = RREG32(target);
+ tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 0);
+ WREG32(target, tmp);
+ } else {
+ tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
+
+ tmp = RREG32(target);
+ tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
+ GENERIC2_INT_ENABLE, 1);
+ WREG32(target, tmp);
+ }
+ break;
+ default:
+ BUG(); /* kiq only support GENERIC2_INT now */
+ break;
+ }
+ return 0;
+}
+
+static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ u8 me_id, pipe_id, queue_id;
+ struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+
+ me_id = (entry->ring_id & 0x0c) >> 2;
+ pipe_id = (entry->ring_id & 0x03) >> 0;
+ queue_id = (entry->ring_id & 0x70) >> 4;
+ DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
+ me_id, pipe_id, queue_id);
+
+ amdgpu_fence_process(ring);
+ return 0;
+}
+
+static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
+ .name = "gfx_v10_0",
+ .early_init = gfx_v10_0_early_init,
+ .late_init = gfx_v10_0_late_init,
+ .sw_init = gfx_v10_0_sw_init,
+ .sw_fini = gfx_v10_0_sw_fini,
+ .hw_init = gfx_v10_0_hw_init,
+ .hw_fini = gfx_v10_0_hw_fini,
+ .suspend = gfx_v10_0_suspend,
+ .resume = gfx_v10_0_resume,
+ .is_idle = gfx_v10_0_is_idle,
+ .wait_for_idle = gfx_v10_0_wait_for_idle,
+ .soft_reset = gfx_v10_0_soft_reset,
+ .set_clockgating_state = gfx_v10_0_set_clockgating_state,
+ .set_powergating_state = gfx_v10_0_set_powergating_state,
+ .get_clockgating_state = gfx_v10_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
+ .type = AMDGPU_RING_TYPE_GFX,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .vmhub = AMDGPU_GFXHUB,
+ .get_rptr = gfx_v10_0_ring_get_rptr_gfx,
+ .get_wptr = gfx_v10_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v10_0_ring_set_wptr_gfx,
+ .emit_frame_size = /* totally 242 maximum if 16 IBs */
+ 5 + /* COND_EXEC */
+ 7 + /* PIPELINE_SYNC */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
+ 8 + /* FENCE for VM_FLUSH */
+ 20 + /* GDS switch */
+ 4 + /* double SWITCH_BUFFER,
+ * the first COND_EXEC jump to the place
+ * just prior to this double SWITCH_BUFFER
+ */
+ 5 + /* COND_EXEC */
+ 7 + /* HDP_flush */
+ 4 + /* VGT_flush */
+ 14 + /* CE_META */
+ 31 + /* DE_META */
+ 3 + /* CNTX_CTRL */
+ 5 + /* HDP_INVL */
+ 8 + 8 + /* FENCE x2 */
+ 2, /* SWITCH_BUFFER */
+ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_gfx */
+ .emit_ib = gfx_v10_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v10_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v10_0_ring_test_ring,
+ .test_ib = gfx_v10_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_switch_buffer = gfx_v10_0_ring_emit_sb,
+ .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
+ .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
+ .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
+ .preempt_ib = gfx_v10_0_ring_preempt_ib,
+ .emit_tmz = gfx_v10_0_ring_emit_tmz,
+ .emit_wreg = gfx_v10_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+};
+
+static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
+ .type = AMDGPU_RING_TYPE_COMPUTE,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .vmhub = AMDGPU_GFXHUB,
+ .get_rptr = gfx_v10_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v10_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v10_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v10_0_ring_emit_gds_switch */
+ 7 + /* gfx_v10_0_ring_emit_hdp_flush */
+ 5 + /* hdp invalidate */
+ 7 + /* gfx_v10_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v10_0_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v10_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v10_0_ring_emit_fence,
+ .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush,
+ .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch,
+ .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
+ .test_ring = gfx_v10_0_ring_test_ring,
+ .test_ib = gfx_v10_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v10_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+};
+
+static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
+ .type = AMDGPU_RING_TYPE_KIQ,
+ .align_mask = 0xff,
+ .nop = PACKET3(PACKET3_NOP, 0x3FFF),
+ .support_64bit_ptrs = true,
+ .vmhub = AMDGPU_GFXHUB,
+ .get_rptr = gfx_v10_0_ring_get_rptr_compute,
+ .get_wptr = gfx_v10_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v10_0_ring_set_wptr_compute,
+ .emit_frame_size =
+ 20 + /* gfx_v10_0_ring_emit_gds_switch */
+ 7 + /* gfx_v10_0_ring_emit_hdp_flush */
+ 5 + /*hdp invalidate */
+ 7 + /* gfx_v10_0_ring_emit_pipeline_sync */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v10_0_ring_emit_vm_flush */
+ 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
+ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
+ .emit_ib = gfx_v10_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v10_0_ring_emit_fence_kiq,
+ .test_ring = gfx_v10_0_ring_test_ring,
+ .test_ib = gfx_v10_0_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_rreg = gfx_v10_0_ring_emit_rreg,
+ .emit_wreg = gfx_v10_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+};
+
+static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ adev->gfx.kiq.ring.funcs = &gfx_v10_0_ring_funcs_kiq;
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx;
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ adev->gfx.compute_ring[i].funcs = &gfx_v10_0_ring_funcs_compute;
+}
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_eop_irq_funcs = {
+ .set = gfx_v10_0_set_eop_interrupt_state,
+ .process = gfx_v10_0_eop_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = {
+ .set = gfx_v10_0_set_priv_reg_fault_state,
+ .process = gfx_v10_0_priv_reg_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = {
+ .set = gfx_v10_0_set_priv_inst_fault_state,
+ .process = gfx_v10_0_priv_inst_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs = {
+ .set = gfx_v10_0_kiq_set_interrupt_state,
+ .process = gfx_v10_0_kiq_irq,
+};
+
+static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+ adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs;
+
+ adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
+ adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs;
+
+ adev->gfx.priv_reg_irq.num_types = 1;
+ adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs;
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs;
+}
+
+static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;
+ break;
+ default:
+ break;
+ }
+}
+
+static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
+{
+ /* init asic gds info */
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ default:
+ adev->gds.gds_size = 0x10000;
+ adev->gds.gds_compute_max_wave_id = 0x4ff;
+ adev->gds.vgt_gs_max_wave_id = 0x3ff;
+ break;
+ }
+
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
+}
+
+static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+
+ WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
+static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 data, wgp_bitmask;
+ data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
+
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+
+ wgp_bitmask =
+ amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+
+ return (~data) & wgp_bitmask;
+}
+
+static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
+{
+ u32 wgp_idx, wgp_active_bitmap;
+ u32 cu_bitmap_per_wgp, cu_active_bitmap;
+
+ wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
+ cu_active_bitmap = 0;
+
+ for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
+ /* if there is one WGP enabled, it means 2 CUs will be enabled */
+ cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
+ if (wgp_active_bitmap & (1 << wgp_idx))
+ cu_active_bitmap |= cu_bitmap_per_wgp;
+ }
+
+ return cu_active_bitmap;
+}
+
+static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev,
+ struct amdgpu_cu_info *cu_info)
+{
+ int i, j, k, counter, active_cu_number = 0;
+ u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
+ unsigned disable_masks[4 * 2];
+
+ if (!adev || !cu_info)
+ return -EINVAL;
+
+ amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ ao_bitmap = 0;
+ counter = 0;
+ gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+ if (i < 4 && j < 2)
+ gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
+ adev, disable_masks[i * 2 + j]);
+ bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev);
+ cu_info->bitmap[i][j] = bitmap;
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
+ if (bitmap & mask) {
+ if (counter < adev->gfx.config.max_cu_per_sh)
+ ao_bitmap |= mask;
+ counter++;
+ }
+ mask <<= 1;
+ }
+ active_cu_number += counter;
+ if (i < 2 && j < 2)
+ ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
+ cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
+ }
+ }
+ gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ cu_info->number = active_cu_number;
+ cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
+
+ return 0;
+}
+
+const struct amdgpu_ip_block_version gfx_v10_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GFX,
+ .major = 10,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gfx_v10_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h
new file mode 100644
index 000000000000..b442e50324d0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 dvanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFX_V10_0_H__
+#define __GFX_V10_0_H__
+
+extern const struct amdgpu_ip_block_version gfx_v10_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index c0cb244f58cd..789e900905e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -21,6 +21,8 @@
*
*/
#include <linux/firmware.h>
+#include <linux/module.h>
+
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "amdgpu_gfx.h"
@@ -1812,7 +1814,7 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -3113,7 +3115,7 @@ static int gfx_v6_0_sw_init(void *handle)
ring->ring_obj = NULL;
sprintf(ring->name, "gfx");
r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
+ &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
if (r)
return r;
}
@@ -3348,7 +3350,7 @@ static int gfx_v6_0_set_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
switch (type) {
- case AMDGPU_CP_IRQ_GFX_EOP:
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
gfx_v6_0_set_gfx_eop_interrupt_state(adev, state);
break;
case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a59e0fdf5a97..341b5024e598 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -20,8 +20,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "amdgpu_gfx.h"
@@ -2080,7 +2082,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
@@ -4460,7 +4462,7 @@ static int gfx_v7_0_sw_init(void *handle)
ring->ring_obj = NULL;
sprintf(ring->name, "gfx");
r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
+ &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
if (r)
return r;
}
@@ -4493,12 +4495,8 @@ static int gfx_v7_0_sw_init(void *handle)
static int gfx_v7_0_sw_fini(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
+ int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -4801,7 +4799,7 @@ static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
switch (type) {
- case AMDGPU_CP_IRQ_GFX_EOP:
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
gfx_v7_0_set_gfx_eop_interrupt_state(adev, state);
break;
case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
@@ -5070,30 +5068,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
{
/* init asci gds info */
- adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
- adev->gds.gws.total_size = 64;
- adev->gds.oa.total_size = 16;
+ adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
-
- if (adev->gds.mem.total_size == 64 * 1024) {
- adev->gds.mem.gfx_partition_size = 4096;
- adev->gds.mem.cs_partition_size = 4096;
-
- adev->gds.gws.gfx_partition_size = 4;
- adev->gds.gws.cs_partition_size = 4;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 1;
- } else {
- adev->gds.mem.gfx_partition_size = 1024;
- adev->gds.mem.cs_partition_size = 1024;
-
- adev->gds.gws.gfx_partition_size = 16;
- adev->gds.gws.cs_partition_size = 16;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 4;
- }
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 02955e6e9dd9..032e76dbc51f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -20,9 +20,13 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+
+#include <linux/delay.h>
#include <linux/kernel.h>
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "vi.h"
@@ -855,7 +859,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -2005,7 +2009,7 @@ static int gfx_v8_0_sw_init(void *handle)
}
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
- AMDGPU_CP_IRQ_GFX_EOP);
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
if (r)
return r;
}
@@ -2042,7 +2046,7 @@ static int gfx_v8_0_sw_init(void *handle)
return r;
/* create MQD for all compute queues as well as KIQ for SRIOV case */
- r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
if (r)
return r;
@@ -2057,19 +2061,15 @@ static int gfx_v8_0_sw_init(void *handle)
static int gfx_v8_0_sw_fini(void *handle)
{
- int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
+ int i;
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_compute_mqd_sw_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev);
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
amdgpu_gfx_kiq_fini(adev);
@@ -3925,11 +3925,10 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
int list_size;
unsigned int *register_list_format =
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
+ kmemdup(adev->gfx.rlc.register_list_format,
+ adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
if (!register_list_format)
return -ENOMEM;
- memcpy(register_list_format, adev->gfx.rlc.register_list_format,
- adev->gfx.rlc.reg_list_format_size_bytes);
gfx_v8_0_parse_ind_reg_list(register_list_format,
RLC_FormatDirectRegListLength,
@@ -6217,7 +6216,7 @@ static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
struct amdgpu_ring *iring;
mutex_lock(&adev->gfx.pipe_reserve_mutex);
- pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
+ pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
if (acquire)
set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
else
@@ -6236,20 +6235,20 @@ static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
/* Lower all pipes without a current reservation */
for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
iring = &adev->gfx.gfx_ring[i];
- pipe = amdgpu_gfx_queue_to_bit(adev,
- iring->me,
- iring->pipe,
- 0);
+ pipe = amdgpu_gfx_mec_queue_to_bit(adev,
+ iring->me,
+ iring->pipe,
+ 0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v8_0_ring_set_pipe_percent(iring, reserve);
}
for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
iring = &adev->gfx.compute_ring[i];
- pipe = amdgpu_gfx_queue_to_bit(adev,
- iring->me,
- iring->pipe,
- 0);
+ pipe = amdgpu_gfx_mec_queue_to_bit(adev,
+ iring->me,
+ iring->pipe,
+ 0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v8_0_ring_set_pipe_percent(iring, reserve);
}
@@ -6537,7 +6536,7 @@ static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
switch (type) {
- case AMDGPU_CP_IRQ_GFX_EOP:
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
break;
case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
@@ -7010,30 +7009,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
{
/* init asci gds info */
- adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
- adev->gds.gws.total_size = 64;
- adev->gds.oa.total_size = 16;
+ adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
-
- if (adev->gds.mem.total_size == 64 * 1024) {
- adev->gds.mem.gfx_partition_size = 4096;
- adev->gds.mem.cs_partition_size = 4096;
-
- adev->gds.gws.gfx_partition_size = 4;
- adev->gds.gws.cs_partition_size = 4;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 1;
- } else {
- adev->gds.mem.gfx_partition_size = 1024;
- adev->gds.mem.cs_partition_size = 1024;
-
- adev->gds.gws.gfx_partition_size = 16;
- adev->gds.gws.cs_partition_size = 16;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 4;
- }
}
static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 2f18c64d531f..5ba332376710 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -20,9 +20,13 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+
+#include <linux/delay.h>
#include <linux/kernel.h>
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "soc15.h"
@@ -35,6 +39,7 @@
#include "vega10_enum.h"
#include "hdp/hdp_4_0_offset.h"
+#include "soc15.h"
#include "soc15_common.h"
#include "clearstate_gfx9.h"
#include "v9_structs.h"
@@ -304,17 +309,20 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_VEGA10:
- soc15_program_register_sequence(adev,
- golden_settings_gc_9_0,
- ARRAY_SIZE(golden_settings_gc_9_0));
- soc15_program_register_sequence(adev,
- golden_settings_gc_9_0_vg10,
- ARRAY_SIZE(golden_settings_gc_9_0_vg10));
+ if (!amdgpu_virt_support_skip_setting(adev)) {
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0,
+ ARRAY_SIZE(golden_settings_gc_9_0));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_0_vg10,
+ ARRAY_SIZE(golden_settings_gc_9_0_vg10));
+ }
break;
case CHIP_VEGA12:
soc15_program_register_sequence(adev,
@@ -419,7 +427,7 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -1468,8 +1476,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
/* GDS reserve memory: 64 bytes alignment */
adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
- adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
- adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
+ adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
@@ -1577,7 +1584,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
gfx_v9_0_write_data_to_reg(ring, 0, false,
SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
- (adev->gds.mem.total_size +
+ (adev->gds.gds_size +
adev->gfx.ngg.gds_reserve_size));
amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
@@ -1718,7 +1725,7 @@ static int gfx_v9_0_sw_init(void *handle)
ring->use_doorbell = true;
ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
+ &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
if (r)
return r;
}
@@ -1754,7 +1761,7 @@ static int gfx_v9_0_sw_init(void *handle)
return r;
/* create MQD for all compute queues as wel as KIQ for SRIOV case */
- r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
+ r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
if (r)
return r;
@@ -1791,24 +1798,18 @@ static int gfx_v9_0_sw_fini(void *handle)
kfree(ras_if);
}
- amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
- amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
-
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
- amdgpu_gfx_compute_mqd_sw_fini(adev);
+ amdgpu_gfx_mqd_sw_fini(adev);
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
amdgpu_gfx_kiq_fini(adev);
gfx_v9_0_mec_fini(adev);
gfx_v9_0_ngg_fini(adev);
- amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
- &adev->gfx.rlc.clear_state_gpu_addr,
- (void **)&adev->gfx.rlc.cs_ptr);
+ amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
if (adev->asic_type == CHIP_RAVEN) {
amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
&adev->gfx.rlc.cp_table_gpu_addr,
@@ -1844,7 +1845,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh
else
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
- WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
}
static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
@@ -1912,8 +1913,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
soc15_grbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
- WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
- WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
}
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -1924,7 +1925,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;
- WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+ WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
gfx_v9_0_tiling_mode_table_init(adev);
@@ -1941,17 +1942,17 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
if (i == 0) {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
- WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
- WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
} else {
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
- WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
(adev->gmc.private_aperture_start >> 48));
tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
(adev->gmc.shared_aperture_start >> 48));
- WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
}
}
soc15_grbm_select(adev, 0, 0, 0, 0);
@@ -2015,11 +2016,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
{
/* csib */
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
adev->gfx.rlc.clear_state_gpu_addr >> 32);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
+ WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
adev->gfx.rlc.clear_state_size);
}
@@ -2073,11 +2074,10 @@ static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
u32 tmp = 0;
u32 *register_list_format =
- kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
+ kmemdup(adev->gfx.rlc.register_list_format,
+ adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
if (!register_list_format)
return -ENOMEM;
- memcpy(register_list_format, adev->gfx.rlc.register_list_format,
- adev->gfx.rlc.reg_list_format_size_bytes);
/* setup unique_indirect_regs array and indirect_start_offsets array */
unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
@@ -2489,7 +2489,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].sched.ready = false;
}
- WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
udelay(50);
}
@@ -2687,9 +2687,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
int i;
if (enable) {
- WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
} else {
- WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
+ WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
for (i = 0; i < adev->gfx.num_compute_rings; i++)
adev->gfx.compute_ring[i].sched.ready = false;
@@ -2750,9 +2750,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
tmp &= 0xffffff00;
tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
tmp |= 0x80;
- WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
}
static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
@@ -2970,67 +2970,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
/* disable wptr polling */
WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
mqd->cp_hqd_eop_base_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
mqd->cp_hqd_eop_base_addr_hi);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
- WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
mqd->cp_hqd_eop_control);
/* enable doorbell? */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
mqd->cp_hqd_pq_doorbell_control);
/* disable the queue if it's active */
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
- WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
for (j = 0; j < adev->usec_timeout; j++) {
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
- WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
mqd->cp_hqd_dequeue_request);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
mqd->cp_hqd_pq_rptr);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
mqd->cp_hqd_pq_wptr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
mqd->cp_hqd_pq_wptr_hi);
}
/* set the pointer to the MQD */
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
mqd->cp_mqd_base_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
mqd->cp_mqd_base_addr_hi);
/* set MQD vmid to 0 */
- WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
+ WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
mqd->cp_mqd_control);
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
mqd->cp_hqd_pq_base_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
mqd->cp_hqd_pq_base_hi);
/* set up the HQD, this is similar to CP_RB0_CNTL */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
mqd->cp_hqd_pq_control);
/* set the wb address whether it's enabled or not */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
mqd->cp_hqd_pq_rptr_report_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
mqd->cp_hqd_pq_rptr_report_addr_hi);
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
mqd->cp_hqd_pq_wptr_poll_addr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
mqd->cp_hqd_pq_wptr_poll_addr_hi);
/* enable the doorbell if requested */
@@ -3041,23 +3041,23 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
(adev->doorbell_index.userqueue_end * 2) << 2);
}
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
mqd->cp_hqd_pq_doorbell_control);
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
mqd->cp_hqd_pq_wptr_lo);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
mqd->cp_hqd_pq_wptr_hi);
/* set the vmid for the queue */
- WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
- WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
mqd->cp_hqd_persistent_state);
/* activate the queue */
- WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
mqd->cp_hqd_active);
if (ring->use_doorbell)
@@ -3074,7 +3074,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
/* disable the queue if it's active */
if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
- WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
for (j = 0; j < adev->usec_timeout; j++) {
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
@@ -3086,21 +3086,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
DRM_DEBUG("KIQ dequeue request failed.\n");
/* Manual disable if dequeue request times out */
- WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
}
- WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
0);
}
- WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
return 0;
}
@@ -3520,6 +3520,279 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
+static const u32 vgpr_init_compute_shader[] =
+{
+ 0xb07c0000, 0xbe8000ff,
+ 0x000000f8, 0xbf110800,
+ 0x7e000280, 0x7e020280,
+ 0x7e040280, 0x7e060280,
+ 0x7e080280, 0x7e0a0280,
+ 0x7e0c0280, 0x7e0e0280,
+ 0x80808800, 0xbe803200,
+ 0xbf84fff5, 0xbf9c0000,
+ 0xd28c0001, 0x0001007f,
+ 0xd28d0001, 0x0002027e,
+ 0x10020288, 0xb8810904,
+ 0xb7814000, 0xd1196a01,
+ 0x00000301, 0xbe800087,
+ 0xbefc00c1, 0xd89c4000,
+ 0x00020201, 0xd89cc080,
+ 0x00040401, 0x320202ff,
+ 0x00000800, 0x80808100,
+ 0xbf84fff8, 0x7e020280,
+ 0xbf810000, 0x00000000,
+};
+
+static const u32 sgpr_init_compute_shader[] =
+{
+ 0xb07c0000, 0xbe8000ff,
+ 0x0000005f, 0xbee50080,
+ 0xbe812c65, 0xbe822c65,
+ 0xbe832c65, 0xbe842c65,
+ 0xbe852c65, 0xb77c0005,
+ 0x80808500, 0xbf84fff8,
+ 0xbe800080, 0xbf810000,
+};
+
+static const struct soc15_reg_entry vgpr_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
+};
+
+static const struct soc15_reg_entry sgpr_init_regs[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
+ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+};
+
+static const struct soc15_reg_entry sec_ded_counter_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
+ { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
+ { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
+ { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
+ { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
+ { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
+ { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
+};
+
+static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
+ int i, r;
+
+ r = amdgpu_ring_alloc(ring, 7);
+ if (r) {
+ DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
+ ring->name, r);
+ return r;
+ }
+
+ WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
+ WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
+ amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
+ PACKET3_DMA_DATA_DST_SEL(1) |
+ PACKET3_DMA_DATA_SRC_SEL(2) |
+ PACKET3_DMA_DATA_ENGINE(0)));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
+ adev->gds.gds_size);
+
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout)
+ r = -ETIMEDOUT;
+
+ WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
+
+ return r;
+}
+
+static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ int r, i, j, k;
+ unsigned total_size, vgpr_offset, sgpr_offset;
+ u64 gpu_addr;
+
+ /* only support when RAS is enabled */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return 0;
+
+ /* bail if the compute ring is not ready */
+ if (!ring->sched.ready)
+ return 0;
+
+ total_size =
+ ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+ total_size +=
+ ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+ total_size = ALIGN(total_size, 256);
+ vgpr_offset = total_size;
+ total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
+ sgpr_offset = total_size;
+ total_size += sizeof(sgpr_init_compute_shader);
+
+ /* allocate an indirect buffer to put the commands in */
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, total_size, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+ return r;
+ }
+
+ /* load the compute shaders */
+ for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
+ ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
+
+ for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
+ ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
+
+ /* init the ib length to 0 */
+ ib.length_dw = 0;
+
+ /* VGPR */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = 128; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* SGPR */
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
+ }
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+ ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write dispatch packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib.ptr[ib.length_dw++] = 128; /* x */
+ ib.ptr[ib.length_dw++] = 1; /* y */
+ ib.ptr[ib.length_dw++] = 1; /* z */
+ ib.ptr[ib.length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* write CS partial flush packet */
+ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+ ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+ /* shedule the ib on the ring */
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r) {
+ DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
+ goto fail;
+ }
+
+ /* wait for the GPU to finish processing the IB */
+ r = dma_fence_wait(f, false);
+ if (r) {
+ DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+ goto fail;
+ }
+
+ /* read back registers to clear the counters */
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
+ for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
+ for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
+ gfx_v9_0_select_se_sh(adev, j, 0x0, k);
+ RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+ }
+ }
+ }
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+fail:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+
+ return r;
+}
+
static int gfx_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -3561,8 +3834,35 @@ static int gfx_v9_0_ecc_late_init(void *handle)
return 0;
}
- if (*ras_if)
+ r = gfx_v9_0_do_edc_gds_workarounds(adev);
+ if (r)
+ return r;
+
+ /* requires IBs so do in late init after IB pool is initialized */
+ r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+ if (r)
+ return r;
+
+ /* handle resume path. */
+ if (*ras_if) {
+ /* resend ras TA enable cmd during resume.
+ * prepare to handle failure.
+ */
+ ih_info.head = **ras_if;
+ r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
+ if (r) {
+ if (r == -EAGAIN) {
+ /* request a gpu reset. will run again. */
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__GFX);
+ return 0;
+ }
+ /* fail to enable ras, cleanup all. */
+ goto irq;
+ }
+ /* enable successfully. continue. */
goto resume;
+ }
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
if (!*ras_if)
@@ -3571,8 +3871,14 @@ static int gfx_v9_0_ecc_late_init(void *handle)
**ras_if = ras_block;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r)
+ if (r) {
+ if (r == -EAGAIN) {
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__GFX);
+ r = 0;
+ }
goto feature;
+ }
ih_info.head = **ras_if;
fs_info.head = **ras_if;
@@ -3581,9 +3887,7 @@ static int gfx_v9_0_ecc_late_init(void *handle)
if (r)
goto interrupt;
- r = amdgpu_ras_debugfs_create(adev, &fs_info);
- if (r)
- goto debugfs;
+ amdgpu_ras_debugfs_create(adev, &fs_info);
r = amdgpu_ras_sysfs_create(adev, &fs_info);
if (r)
@@ -3598,14 +3902,13 @@ irq:
amdgpu_ras_sysfs_remove(adev, *ras_if);
sysfs:
amdgpu_ras_debugfs_remove(adev, *ras_if);
-debugfs:
amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
interrupt:
amdgpu_ras_feature_enable(adev, *ras_if, 0);
feature:
kfree(*ras_if);
*ras_if = NULL;
- return -EINVAL;
+ return r;
}
static int gfx_v9_0_late_init(void *handle)
@@ -4259,7 +4562,7 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
struct amdgpu_ring *iring;
mutex_lock(&adev->gfx.pipe_reserve_mutex);
- pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
+ pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
if (acquire)
set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
else
@@ -4278,20 +4581,20 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
/* Lower all pipes without a current reservation */
for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
iring = &adev->gfx.gfx_ring[i];
- pipe = amdgpu_gfx_queue_to_bit(adev,
- iring->me,
- iring->pipe,
- 0);
+ pipe = amdgpu_gfx_mec_queue_to_bit(adev,
+ iring->me,
+ iring->pipe,
+ 0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v9_0_ring_set_pipe_percent(iring, reserve);
}
for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
iring = &adev->gfx.compute_ring[i];
- pipe = amdgpu_gfx_queue_to_bit(adev,
- iring->me,
- iring->pipe,
- 0);
+ pipe = amdgpu_gfx_mec_queue_to_bit(adev,
+ iring->me,
+ iring->pipe,
+ 0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v9_0_ring_set_pipe_percent(iring, reserve);
}
@@ -4310,8 +4613,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
- WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
- WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
+ WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
@@ -4706,7 +5009,7 @@ static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
switch (type) {
- case AMDGPU_CP_IRQ_GFX_EOP:
+ case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
break;
case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
@@ -5047,13 +5350,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
- adev->gds.mem.total_size = 0x10000;
+ adev->gds.gds_size = 0x10000;
break;
case CHIP_RAVEN:
- adev->gds.mem.total_size = 0x1000;
+ adev->gds.gds_size = 0x1000;
break;
default:
- adev->gds.mem.total_size = 0x10000;
+ adev->gds.gds_size = 0x10000;
break;
}
@@ -5077,28 +5380,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
break;
}
- adev->gds.gws.total_size = 64;
- adev->gds.oa.total_size = 16;
-
- if (adev->gds.mem.total_size == 64 * 1024) {
- adev->gds.mem.gfx_partition_size = 4096;
- adev->gds.mem.cs_partition_size = 4096;
-
- adev->gds.gws.gfx_partition_size = 4;
- adev->gds.gws.cs_partition_size = 4;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 1;
- } else {
- adev->gds.mem.gfx_partition_size = 1024;
- adev->gds.mem.cs_partition_size = 1024;
-
- adev->gds.gws.gfx_partition_size = 16;
- adev->gds.gws.cs_partition_size = 16;
-
- adev->gds.oa.gfx_partition_size = 4;
- adev->gds.oa.cs_partition_size = 4;
- }
+ adev->gds.gws_size = 64;
+ adev->gds.oa_size = 16;
}
static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 7bb5359d0bbd..9f0f189fc111 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
uint64_t value;
/* Program the AGP BAR */
- WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0);
- WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
- WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
/* Program the system aperture low logical page number. */
- WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
@@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
* workaround that increase system aperture high address (add 1)
* to get rid of the VM fault and hardware hang.
*/
- WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max((adev->gmc.fb_end >> 18) + 0x1,
adev->gmc.agp_end >> 18));
else
- WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
@@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
MTYPE, MTYPE_UC);/* XXX for emulation. */
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
- WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
@@ -146,12 +146,12 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
- WREG32_SOC15(GC, 0, mmVM_L2_CNTL, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL, tmp);
tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
- WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL2, tmp);
tmp = mmVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
@@ -163,12 +163,12 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
- WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL3, tmp);
tmp = mmVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
- WREG32_SOC15(GC, 0, mmVM_L2_CNTL4, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL4, tmp);
}
static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
@@ -267,9 +267,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
* VF copy registers so vbios post doesn't program them, for
* SRIOV driver need to program them
*/
- WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE,
adev->gmc.vram_start >> 24);
- WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP,
adev->gmc.vram_end >> 24);
}
@@ -303,7 +303,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
MC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL,
0);
- WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
new file mode 100644
index 000000000000..b7de60a15623
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "gfxhub_v2_0.h"
+
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "gc/gc_10_1_0_default.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+u64 gfxhub_v2_0_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base = RREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE);
+
+ base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24;
+}
+
+static void gfxhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev)
+{
+ uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ lower_32_bits(value));
+
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ upper_32_bits(value));
+}
+
+static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ gfxhub_v2_0_init_gart_pt_regs(adev);
+
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void gfxhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+
+ /* Disable AGP. */
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ adev->gmc.vram_start >> 18);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ adev->gmc.vram_end >> 18);
+
+ /* Set default page address. */
+ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
+ + adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ WREG32_FIELD15(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+}
+
+
+static void gfxhub_v2_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
+ L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp);
+
+ tmp = mmGCVM_L2_CNTL3_DEFAULT;
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp);
+
+ tmp = mmGCVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL4, tmp);
+}
+
+static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp);
+}
+
+static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
+ 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
+ 0);
+
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
+
+}
+
+static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+}
+
+static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ for (i = 0 ; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ 2 * i, 0xffffffff);
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ 2 * i, 0x1f);
+ }
+}
+
+int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ gfxhub_v2_0_init_gart_aperture_regs(adev);
+ gfxhub_v2_0_init_system_aperture_regs(adev);
+ gfxhub_v2_0_init_tlb_regs(adev);
+ gfxhub_v2_0_init_cache_regs(adev);
+
+ gfxhub_v2_0_enable_system_domain(adev);
+ gfxhub_v2_0_disable_identity_aperture(adev);
+ gfxhub_v2_0_setup_vmid_config(adev);
+ gfxhub_v2_0_program_invalidation(adev);
+
+ return 0;
+}
+
+void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0);
+}
+
+/**
+ * gfxhub_v2_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+ tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+void gfxhub_v2_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(GC, 0,
+ mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
new file mode 100644
index 000000000000..06807940748b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V2_0_H__
+#define __GFXHUB_V2_0_H__
+
+u64 gfxhub_v2_0_get_fb_location(struct amdgpu_device *adev);
+int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev);
+void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev);
+void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value);
+void gfxhub_v2_0_init(struct amdgpu_device *adev);
+u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
new file mode 100644
index 000000000000..5eeb72fcc123
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -0,0 +1,918 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/pci.h>
+#include "amdgpu.h"
+#include "amdgpu_atomfirmware.h"
+#include "gmc_v10_0.h"
+
+#include "hdp/hdp_5_0_0_offset.h"
+#include "hdp/hdp_5_0_0_sh_mask.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "dcn/dcn_2_0_0_offset.h"
+#include "dcn/dcn_2_0_0_sh_mask.h"
+#include "oss/osssys_5_0_0_offset.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "navi10_enum.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+
+#include "nbio_v2_3.h"
+
+#include "gfxhub_v2_0.h"
+#include "mmhub_v2_0.h"
+#include "athub_v2_0.h"
+/* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/
+#define AMDGPU_NUM_OF_VMIDS 8
+
+#if 0
+static const struct soc15_reg_golden golden_settings_navi10_hdp[] =
+{
+ /* TODO add golden setting for hdp */
+};
+#endif
+
+static int
+gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src, unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ struct amdgpu_vmhub *hub;
+ u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i;
+
+ bits[AMDGPU_GFXHUB] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ bits[AMDGPU_MMHUB] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ /* MM HUB */
+ hub = &adev->vmhub[AMDGPU_MMHUB];
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+ tmp = RREG32(reg);
+ tmp &= ~bits[AMDGPU_MMHUB];
+ WREG32(reg, tmp);
+ }
+
+ /* GFX HUB */
+ hub = &adev->vmhub[AMDGPU_GFXHUB];
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+ tmp = RREG32(reg);
+ tmp &= ~bits[AMDGPU_GFXHUB];
+ WREG32(reg, tmp);
+ }
+ break;
+ case AMDGPU_IRQ_STATE_ENABLE:
+ /* MM HUB */
+ hub = &adev->vmhub[AMDGPU_MMHUB];
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+ tmp = RREG32(reg);
+ tmp |= bits[AMDGPU_MMHUB];
+ WREG32(reg, tmp);
+ }
+
+ /* GFX HUB */
+ hub = &adev->vmhub[AMDGPU_GFXHUB];
+ for (i = 0; i < 16; i++) {
+ reg = hub->vm_context0_cntl + i;
+ tmp = RREG32(reg);
+ tmp |= bits[AMDGPU_GFXHUB];
+ WREG32(reg, tmp);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
+ uint32_t status = 0;
+ u64 addr;
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+ }
+
+ if (printk_ratelimit()) {
+ dev_err(adev->dev,
+ "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
+ entry->vmid_src ? "mmhub" : "gfxhub",
+ entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid);
+ dev_err(adev->dev, " at page 0x%016llx from %d\n",
+ addr, entry->client_id);
+ if (!amdgpu_sriov_vf(adev))
+ dev_err(adev->dev,
+ "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+ }
+
+ return 0;
+}
+
+static const struct amdgpu_irq_src_funcs gmc_v10_0_irq_funcs = {
+ .set = gmc_v10_0_vm_fault_interrupt_state,
+ .process = gmc_v10_0_process_interrupt,
+};
+
+static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v10_0_irq_funcs;
+}
+
+static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+/*
+ * GART
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+ * VMIDs 1-15 are used for userspace clients and are handled
+ * by the amdgpu vm/hsa code.
+ */
+
+static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
+ unsigned int vmhub, uint32_t flush_type)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
+ u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
+ /* Use register 17 for GART */
+ const unsigned eng = 17;
+ unsigned int i;
+
+ WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+
+ /* Wait for ACK with a delay.*/
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+ tmp &= 1 << vmid;
+ if (tmp)
+ break;
+
+ udelay(1);
+ }
+
+ if (i < adev->usec_timeout)
+ return;
+
+ DRM_ERROR("Timeout waiting for VM flush ACK!\n");
+}
+
+/**
+ * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
+ *
+ * @adev: amdgpu_device pointer
+ * @vmid: vm instance to flush
+ *
+ * Flush the TLB for the requested page table.
+ */
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
+ uint32_t vmid, uint32_t flush_type)
+{
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct dma_fence *fence;
+ struct amdgpu_job *job;
+
+ int r;
+
+ /* flush hdp cache */
+ adev->nbio_funcs->hdp_flush(adev, NULL);
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+
+ gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB, 0);
+ if (!adev->mman.buffer_funcs_enabled ||
+ !adev->ib_pool_ready ||
+ adev->in_gpu_reset) {
+ gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB, 0);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ return;
+ }
+
+ /* The SDMA on Navi has a bug which can theoretically result in memory
+ * corruption if an invalidation happens at the same time as an VA
+ * translation. Avoid this by doing the invalidation from the SDMA
+ * itself.
+ */
+ r = amdgpu_job_alloc_with_ib(adev, 16 * 4, &job);
+ if (r)
+ goto error_alloc;
+
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+ job->vm_needs_flush = true;
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ r = amdgpu_job_submit(job, &adev->mman.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+ if (r)
+ goto error_submit;
+
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+
+ return;
+
+error_submit:
+ amdgpu_job_free(job);
+
+error_alloc:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
+}
+
+static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0);
+ unsigned eng = ring->vm_inv_eng;
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
+ lower_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
+ upper_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
+
+ /* wait for the invalidate to complete */
+ amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
+ 1 << vmid, 1 << vmid);
+
+ return pd_addr;
+}
+
+static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
+ unsigned pasid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg;
+
+ if (ring->funcs->vmhub == AMDGPU_GFXHUB)
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
+ else
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
+
+ amdgpu_ring_emit_wreg(ring, reg, pasid);
+}
+
+/*
+ * PTE format on NAVI 10:
+ * 63:59 reserved
+ * 58:57 reserved
+ * 56 F
+ * 55 L
+ * 54 reserved
+ * 53:52 SW
+ * 51 T
+ * 50:48 mtype
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 Z
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format on NAVI 10:
+ * 63:59 block fragment size
+ * 58:55 reserved
+ * 54 P
+ * 53:48 reserved
+ * 47:6 physical base address of PD or PTE
+ * 5:3 reserved
+ * 2 C
+ * 1 system
+ * 0 valid
+ */
+static uint64_t gmc_v10_0_get_vm_pte_flags(struct amdgpu_device *adev,
+ uint32_t flags)
+{
+ uint64_t pte_flag = 0;
+
+ if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
+ pte_flag |= AMDGPU_PTE_EXECUTABLE;
+ if (flags & AMDGPU_VM_PAGE_READABLE)
+ pte_flag |= AMDGPU_PTE_READABLE;
+ if (flags & AMDGPU_VM_PAGE_WRITEABLE)
+ pte_flag |= AMDGPU_PTE_WRITEABLE;
+
+ switch (flags & AMDGPU_VM_MTYPE_MASK) {
+ case AMDGPU_VM_MTYPE_DEFAULT:
+ pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_NC:
+ pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
+ break;
+ case AMDGPU_VM_MTYPE_WC:
+ pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
+ break;
+ case AMDGPU_VM_MTYPE_CC:
+ pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
+ break;
+ case AMDGPU_VM_MTYPE_UC:
+ pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
+ break;
+ default:
+ pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
+ break;
+ }
+
+ if (flags & AMDGPU_VM_PAGE_PRT)
+ pte_flag |= AMDGPU_PTE_PRT;
+
+ return pte_flag;
+}
+
+static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
+ *addr = adev->vm_manager.vram_base_offset + *addr -
+ adev->gmc.vram_start;
+ BUG_ON(*addr & 0xFFFF00000000003FULL);
+
+ if (!adev->gmc.translate_further)
+ return;
+
+ if (level == AMDGPU_VM_PDB1) {
+ /* Set the block fragment size */
+ if (!(*flags & AMDGPU_PDE_PTE))
+ *flags |= AMDGPU_PDE_BFS(0x9);
+
+ } else if (level == AMDGPU_VM_PDB0) {
+ if (*flags & AMDGPU_PDE_PTE)
+ *flags &= ~AMDGPU_PDE_PTE;
+ else
+ *flags |= AMDGPU_PTE_TF;
+ }
+}
+
+static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
+ .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
+ .get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags,
+ .get_vm_pde = gmc_v10_0_get_vm_pde
+};
+
+static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev)
+{
+ if (adev->gmc.gmc_funcs == NULL)
+ adev->gmc.gmc_funcs = &gmc_v10_0_gmc_funcs;
+}
+
+static int gmc_v10_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v10_0_set_gmc_funcs(adev);
+ gmc_v10_0_set_irq_funcs(adev);
+
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start = 0x1000000000000000ULL;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
+
+ return 0;
+}
+
+static int gmc_v10_0_late_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 };
+ unsigned i;
+
+ for(i = 0; i < adev->num_rings; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+ unsigned vmhub = ring->funcs->vmhub;
+
+ ring->vm_inv_eng = vm_inv_eng[vmhub]++;
+ dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n",
+ ring->idx, ring->name, ring->vm_inv_eng,
+ ring->funcs->vmhub);
+ }
+
+ /* Engine 17 is used for GART flushes */
+ for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
+ BUG_ON(vm_inv_eng[i] > 17);
+
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
+}
+
+static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc)
+{
+ u64 base = 0;
+
+ if (!amdgpu_sriov_vf(adev))
+ base = gfxhub_v2_0_get_fb_location(adev);
+
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc);
+
+ /* base offset of vram pages */
+ adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev);
+}
+
+/**
+ * gmc_v10_0_mc_init - initialize the memory controller driver params
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Look up the amount of vram, vram width, and decide how to place
+ * vram and gart within the GPU's physical address space.
+ * Returns 0 for success.
+ */
+static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
+{
+ int chansize, numchan;
+
+ if (!amdgpu_emu_mode)
+ adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
+ else {
+ /* hard code vram_width for emulation */
+ chansize = 128;
+ numchan = 1;
+ adev->gmc.vram_width = numchan * chansize;
+ }
+
+ /* Could aper size report 0 ? */
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+
+ /* size in MB on si */
+ adev->gmc.mc_vram_size =
+ adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+
+ /* In case the PCI BAR is larger than the actual amount of vram */
+ if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+ adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
+
+ /* set the gart size */
+ if (amdgpu_gart_size == -1) {
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ default:
+ adev->gmc.gart_size = 512ULL << 20;
+ break;
+ }
+ } else
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+
+ gmc_v10_0_vram_gtt_location(adev, &adev->gmc);
+
+ return 0;
+}
+
+static int gmc_v10_0_gart_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->gart.bo) {
+ WARN(1, "NAVI10 PCIE GART already initialized\n");
+ return 0;
+ }
+
+ /* Initialize common gart structure */
+ r = amdgpu_gart_init(adev);
+ if (r)
+ return r;
+
+ adev->gart.table_size = adev->gart.num_gpu_pages * 8;
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) |
+ AMDGPU_PTE_EXECUTABLE;
+
+ return amdgpu_gart_table_vram_alloc(adev);
+}
+
+static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+ u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
+ unsigned size;
+
+ if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+ size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+ } else {
+ u32 viewport;
+ u32 pitch;
+
+ viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+ pitch = RREG32_SOC15(DCE, 0, mmHUBPREQ0_DCSURF_SURFACE_PITCH);
+ size = (REG_GET_FIELD(viewport,
+ HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+ REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) *
+ 4);
+ }
+ /* return 0 if the pre-OS buffer uses up most of vram */
+ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) {
+ DRM_ERROR("Warning: pre-OS buffer uses most of vram, \
+ be aware of gart table overwrite\n");
+ return 0;
+ }
+
+ return size;
+}
+
+
+
+static int gmc_v10_0_sw_init(void *handle)
+{
+ int r;
+ int dma_bits;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gfxhub_v2_0_init(adev);
+ mmhub_v2_0_init(adev);
+
+ spin_lock_init(&adev->gmc.invalidate_lock);
+
+ adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ /*
+ * To fulfill 4-level page support,
+ * vm size is 256TB (48bit), maximum size of Navi10,
+ * block size 512 (9bit)
+ */
+ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ break;
+ default:
+ break;
+ }
+
+ /* This interrupt is VMC page fault.*/
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC,
+ VMC_1_0__SRCID__VM_FAULT,
+ &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2,
+ UTCL2_1_0__SRCID__FAULT,
+ &adev->gmc.vm_fault);
+ if (r)
+ return r;
+
+ /*
+ * Set the internal MC address mask This is the max address of the GPU's
+ * internal address space.
+ */
+ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+
+ /*
+ * Reserve 8M stolen memory for navi10 like vega10
+ * TODO: will check if it's really needed on asic.
+ */
+ if (amdgpu_emu_mode == 1)
+ adev->gmc.stolen_size = 0;
+ else
+ adev->gmc.stolen_size = 9 * 1024 *1024;
+
+ /*
+ * Set DMA mask + need_dma32 flags.
+ * PCIE - can handle 44-bits.
+ * IGP - can handle 44-bits
+ * PCI - dma32 for legacy pci gart, 44 bits on navi10
+ */
+ adev->need_dma32 = false;
+ dma_bits = adev->need_dma32 ? 32 : 44;
+
+ r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
+ if (r) {
+ adev->need_dma32 = true;
+ dma_bits = 32;
+ printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
+ }
+
+ r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
+ if (r) {
+ pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
+ printk(KERN_WARNING "amdgpu: No coherent DMA available.\n");
+ }
+
+ r = gmc_v10_0_mc_init(adev);
+ if (r)
+ return r;
+
+ adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev);
+
+ /* Memory manager */
+ r = amdgpu_bo_init(adev);
+ if (r)
+ return r;
+
+ r = gmc_v10_0_gart_init(adev);
+ if (r)
+ return r;
+
+ /*
+ * number of VMs
+ * VMID 0 is reserved for System
+ * amdgpu graphics/compute will use VMIDs 1-7
+ * amdkfd will use VMIDs 8-15
+ */
+ adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS;
+
+ amdgpu_vm_manager_init(adev);
+
+ return 0;
+}
+
+/**
+ * gmc_v8_0_gart_fini - vm fini callback
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Tears down the driver GART/VM setup (CIK).
+ */
+static void gmc_v10_0_gart_fini(struct amdgpu_device *adev)
+{
+ amdgpu_gart_table_vram_free(adev);
+ amdgpu_gart_fini(adev);
+}
+
+static int gmc_v10_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_vm_manager_fini(adev);
+ gmc_v10_0_gart_fini(adev);
+ amdgpu_gem_force_release(adev);
+ amdgpu_bo_fini(adev);
+
+ return 0;
+}
+
+static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * gmc_v10_0_gart_enable - gart enable
+ *
+ * @adev: amdgpu_device pointer
+ */
+static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
+{
+ int r;
+ bool value;
+ u32 tmp;
+
+ if (adev->gart.bo == NULL) {
+ dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
+ return -EINVAL;
+ }
+
+ r = amdgpu_gart_table_vram_pin(adev);
+ if (r)
+ return r;
+
+ r = gfxhub_v2_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ r = mmhub_v2_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL);
+ tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK;
+ WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp);
+
+ tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
+ WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
+
+ /* Flush HDP after it is initialized */
+ adev->nbio_funcs->hdp_flush(adev, NULL);
+
+ value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
+ false : true;
+
+ gfxhub_v2_0_set_fault_enable_default(adev, value);
+ mmhub_v2_0_set_fault_enable_default(adev, value);
+ gmc_v10_0_flush_gpu_tlb(adev, 0, 0);
+
+ DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+ (unsigned)(adev->gmc.gart_size >> 20),
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
+
+ adev->gart.ready = true;
+
+ return 0;
+}
+
+static int gmc_v10_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* The sequence of these two function calls matters.*/
+ gmc_v10_0_init_golden_registers(adev);
+
+ r = gmc_v10_0_gart_enable(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+/**
+ * gmc_v10_0_gart_disable - gart disable
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * This disables all VM page table.
+ */
+static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
+{
+ gfxhub_v2_0_gart_disable(adev);
+ mmhub_v2_0_gart_disable(adev);
+ amdgpu_gart_table_vram_unpin(adev);
+}
+
+static int gmc_v10_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* full access mode, so don't touch any GMC register */
+ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
+ gmc_v10_0_gart_disable(adev);
+
+ return 0;
+}
+
+static int gmc_v10_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ gmc_v10_0_hw_fini(adev);
+
+ return 0;
+}
+
+static int gmc_v10_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = gmc_v10_0_hw_init(adev);
+ if (r)
+ return r;
+
+ amdgpu_vmid_reset_all(adev);
+
+ return 0;
+}
+
+static bool gmc_v10_0_is_idle(void *handle)
+{
+ /* MC is always ready in GMC v10.*/
+ return true;
+}
+
+static int gmc_v10_0_wait_for_idle(void *handle)
+{
+ /* There is no need to wait for MC idle in GMC v10.*/
+ return 0;
+}
+
+static int gmc_v10_0_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static int gmc_v10_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = mmhub_v2_0_set_clockgating(adev, state);
+ if (r)
+ return r;
+
+ return athub_v2_0_set_clockgating(adev, state);
+}
+
+static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ mmhub_v2_0_get_clockgating(adev, flags);
+
+ athub_v2_0_get_clockgating(adev, flags);
+}
+
+static int gmc_v10_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs gmc_v10_0_ip_funcs = {
+ .name = "gmc_v10_0",
+ .early_init = gmc_v10_0_early_init,
+ .late_init = gmc_v10_0_late_init,
+ .sw_init = gmc_v10_0_sw_init,
+ .sw_fini = gmc_v10_0_sw_fini,
+ .hw_init = gmc_v10_0_hw_init,
+ .hw_fini = gmc_v10_0_hw_fini,
+ .suspend = gmc_v10_0_suspend,
+ .resume = gmc_v10_0_resume,
+ .is_idle = gmc_v10_0_is_idle,
+ .wait_for_idle = gmc_v10_0_wait_for_idle,
+ .soft_reset = gmc_v10_0_soft_reset,
+ .set_clockgating_state = gmc_v10_0_set_clockgating_state,
+ .set_powergating_state = gmc_v10_0_set_powergating_state,
+ .get_clockgating_state = gmc_v10_0_get_clockgating_state,
+};
+
+const struct amdgpu_ip_block_version gmc_v10_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_GMC,
+ .major = 10,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &gmc_v10_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h
new file mode 100644
index 000000000000..7daa53d8996c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GMC_V10_0_H__
+#define __GMC_V10_0_H__
+
+extern const struct amd_ip_funcs gmc_v10_0_ip_funcs;
+extern const struct amdgpu_ip_block_version gmc_v10_0_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index b06d876da2d9..ca8dbe91cc8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -20,8 +20,11 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "gmc_v6_0.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 75aa3332aee2..57f80065d57a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -20,8 +20,11 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "cikd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 8a3b5e6fc6c9..9238280d1ff7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -20,8 +20,11 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "gmc_v8_0.h"
@@ -289,7 +292,7 @@ out:
*
* @adev: amdgpu_device pointer
*
- * Load the GDDR MC ucode into the hw (CIK).
+ * Load the GDDR MC ucode into the hw (VI).
* Returns 0 on success, error on failure.
*/
static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
@@ -443,7 +446,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
* @adev: amdgpu_device pointer
*
* Set the location of vram, gart, and AGP in the GPU's
- * physical address space (CIK).
+ * physical address space (VI).
*/
static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
{
@@ -515,7 +518,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
* @adev: amdgpu_device pointer
*
* Look up the amount of vram, vram width, and decide how to place
- * vram and gart within the GPU's physical address space (CIK).
+ * vram and gart within the GPU's physical address space (VI).
* Returns 0 for success.
*/
static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
@@ -630,7 +633,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
* @adev: amdgpu_device pointer
* @vmid: vm instance to flush
*
- * Flush the TLB for the requested page table (CIK).
+ * Flush the TLB for the requested page table (VI).
*/
static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid, uint32_t flush_type)
@@ -800,7 +803,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
* This sets up the TLBs, programs the page tables for VMID0,
* sets up the hw for VMIDs 1-15 which are allocated on
* demand, and sets up the global locations for the LDS, GDS,
- * and GPUVM for FSA64 clients (CIK).
+ * and GPUVM for FSA64 clients (VI).
* Returns 0 for success, errors for failure.
*/
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
@@ -948,7 +951,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev)
*
* @adev: amdgpu_device pointer
*
- * This disables all VM page table (CIK).
+ * This disables all VM page table (VI).
*/
static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
{
@@ -978,7 +981,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
* @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
* @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
*
- * Print human readable fault information (CIK).
+ * Print human readable fault information (VI).
*/
static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
u32 addr, u32 mc_client, unsigned pasid)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 72837b8c7031..73f3b79ab131 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -20,8 +20,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+
#include <linux/firmware.h>
+#include <linux/pci.h>
+
#include <drm/drm_cache.h>
+
#include "amdgpu.h"
#include "gmc_v9_0.h"
#include "amdgpu_atomfirmware.h"
@@ -531,22 +535,22 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
switch (flags & AMDGPU_VM_MTYPE_MASK) {
case AMDGPU_VM_MTYPE_DEFAULT:
- pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC);
+ pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
break;
case AMDGPU_VM_MTYPE_NC:
- pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC);
+ pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
break;
case AMDGPU_VM_MTYPE_WC:
- pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_WC);
+ pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
break;
case AMDGPU_VM_MTYPE_CC:
- pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_CC);
+ pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
break;
case AMDGPU_VM_MTYPE_UC:
- pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_UC);
+ pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
break;
default:
- pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC);
+ pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
break;
}
@@ -686,8 +690,25 @@ static int gmc_v9_0_ecc_late_init(void *handle)
return 0;
}
/* handle resume path. */
- if (*ras_if)
+ if (*ras_if) {
+ /* resend ras TA enable cmd during resume.
+ * prepare to handle failure.
+ */
+ ih_info.head = **ras_if;
+ r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
+ if (r) {
+ if (r == -EAGAIN) {
+ /* request a gpu reset. will run again. */
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__UMC);
+ return 0;
+ }
+ /* fail to enable ras, cleanup all. */
+ goto irq;
+ }
+ /* enable successfully. continue. */
goto resume;
+ }
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
if (!*ras_if)
@@ -696,8 +717,14 @@ static int gmc_v9_0_ecc_late_init(void *handle)
**ras_if = ras_block;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r)
+ if (r) {
+ if (r == -EAGAIN) {
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__UMC);
+ r = 0;
+ }
goto feature;
+ }
ih_info.head = **ras_if;
fs_info.head = **ras_if;
@@ -706,9 +733,7 @@ static int gmc_v9_0_ecc_late_init(void *handle)
if (r)
goto interrupt;
- r = amdgpu_ras_debugfs_create(adev, &fs_info);
- if (r)
- goto debugfs;
+ amdgpu_ras_debugfs_create(adev, &fs_info);
r = amdgpu_ras_sysfs_create(adev, &fs_info);
if (r)
@@ -723,14 +748,13 @@ irq:
amdgpu_ras_sysfs_remove(adev, *ras_if);
sysfs:
amdgpu_ras_debugfs_remove(adev, *ras_if);
-debugfs:
amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
interrupt:
amdgpu_ras_feature_enable(adev, *ras_if, 0);
feature:
kfree(*ras_if);
*ras_if = NULL;
- return -EINVAL;
+ return r;
}
@@ -892,7 +916,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
if (r)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
- adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE(MTYPE_UC) |
+ adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
return amdgpu_gart_table_vram_alloc(adev);
}
@@ -1099,6 +1123,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
+ if (amdgpu_virt_support_skip_setting(adev))
+ break;
+ /* fall through */
case CHIP_VEGA20:
soc15_program_register_sequence(adev,
golden_settings_mmhub_1_0_0,
@@ -1163,6 +1190,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
+ WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
+ WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
+
/* After HDP is initialized, flush HDP.*/
adev->nbio_funcs->hdp_flush(adev, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index b1626e1d2f5d..a13dd9a51149 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -20,7 +20,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index f2e6b148ccad..4b3faaccecb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -21,7 +21,6 @@
*
*/
-#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "cikd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_smc.c b/drivers/gpu/drm/amd/amdgpu/kv_smc.c
index b82e33c01571..2d9ab6b8be66 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_smc.c
@@ -22,7 +22,6 @@
* Authors: Alex Deucher
*/
-#include <drm/drmP.h>
#include "amdgpu.h"
#include "cikd.h"
#include "kv_dpm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
new file mode 100644
index 000000000000..29fab7984855
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include "amdgpu.h"
+#include "soc15_common.h"
+#include "nv.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/navi10_mes.bin");
+
+static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
+ struct mes_add_queue_input *input)
+{
+ return 0;
+}
+
+static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
+ struct mes_remove_queue_input *input)
+{
+ return 0;
+}
+
+static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
+ struct mes_suspend_gang_input *input)
+{
+ return 0;
+}
+
+static int mes_v10_1_resume_gang(struct amdgpu_mes *mes,
+ struct mes_resume_gang_input *input)
+{
+ return 0;
+}
+
+static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
+ .add_hw_queue = mes_v10_1_add_hw_queue,
+ .remove_hw_queue = mes_v10_1_remove_hw_queue,
+ .suspend_gang = mes_v10_1_suspend_gang,
+ .resume_gang = mes_v10_1_resume_gang,
+};
+
+static int mes_v10_1_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ chip_name = "navi10";
+ break;
+ default:
+ BUG();
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", chip_name);
+ err = request_firmware(&adev->mes.fw, fw_name, adev->dev);
+ if (err)
+ return err;
+
+ err = amdgpu_ucode_validate(adev->mes.fw);
+ if (err) {
+ release_firmware(adev->mes.fw);
+ adev->mes.fw = NULL;
+ return err;
+ }
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)adev->mes.fw->data;
+ adev->mes.ucode_fw_version = le32_to_cpu(mes_hdr->mes_ucode_version);
+ adev->mes.ucode_fw_version =
+ le32_to_cpu(mes_hdr->mes_ucode_data_version);
+ adev->mes.uc_start_addr =
+ le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
+ adev->mes.data_start_addr =
+ le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
+ ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
+
+ return 0;
+}
+
+static void mes_v10_1_free_microcode(struct amdgpu_device *adev)
+{
+ release_firmware(adev->mes.fw);
+ adev->mes.fw = NULL;
+}
+
+static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.ucode_fw_obj,
+ &adev->mes.ucode_fw_gpu_addr,
+ (void **)&adev->mes.ucode_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.ucode_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.ucode_fw_obj);
+ amdgpu_bo_unreserve(adev->mes.ucode_fw_obj);
+
+ return 0;
+}
+
+static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev)
+{
+ int r;
+ const struct mes_firmware_header_v1_0 *mes_hdr;
+ const __le32 *fw_data;
+ unsigned fw_size;
+
+ mes_hdr = (const struct mes_firmware_header_v1_0 *)
+ adev->mes.fw->data;
+
+ fw_data = (const __le32 *)(adev->mes.fw->data +
+ le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
+ fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
+
+ r = amdgpu_bo_create_reserved(adev, fw_size,
+ 64 * 1024, AMDGPU_GEM_DOMAIN_GTT,
+ &adev->mes.data_fw_obj,
+ &adev->mes.data_fw_gpu_addr,
+ (void **)&adev->mes.data_fw_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r);
+ return r;
+ }
+
+ memcpy(adev->mes.data_fw_ptr, fw_data, fw_size);
+
+ amdgpu_bo_kunmap(adev->mes.data_fw_obj);
+ amdgpu_bo_unreserve(adev->mes.data_fw_obj);
+
+ return 0;
+}
+
+static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev)
+{
+ amdgpu_bo_free_kernel(&adev->mes.data_fw_obj,
+ &adev->mes.data_fw_gpu_addr,
+ (void **)&adev->mes.data_fw_ptr);
+
+ amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj,
+ &adev->mes.ucode_fw_gpu_addr,
+ (void **)&adev->mes.ucode_fw_ptr);
+}
+
+static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable)
+{
+ uint32_t data = 0;
+
+ if (enable) {
+ data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
+
+ /* set ucode start address */
+ WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
+ (uint32_t)(adev->mes.uc_start_addr) >> 2);
+
+ /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL,
+ BYPASS_UNCACHED, 0);
+ WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data);
+
+ /* unhalt MES and activate pipe0 */
+ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1);
+ WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
+ } else {
+ data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
+ data = REG_SET_FIELD(data, CP_MES_CNTL,
+ MES_INVALIDATE_ICACHE, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1);
+ data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1);
+ WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data);
+ }
+}
+
+/* This function is for backdoor MES firmware */
+static int mes_v10_1_load_microcode(struct amdgpu_device *adev)
+{
+ int r;
+ uint32_t data;
+
+ if (!adev->mes.fw)
+ return -EINVAL;
+
+ r = mes_v10_1_allocate_ucode_buffer(adev);
+ if (r)
+ return r;
+
+ r = mes_v10_1_allocate_ucode_data_buffer(adev);
+ if (r) {
+ mes_v10_1_free_ucode_buffers(adev);
+ return r;
+ }
+
+ mes_v10_1_enable(adev, false);
+
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0);
+
+ mutex_lock(&adev->srbm_mutex);
+ /* me=3, pipe=0, queue=0 */
+ nv_grbm_select(adev, 3, 0, 0, 0);
+
+ /* set ucode start address */
+ WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START,
+ (uint32_t)(adev->mes.uc_start_addr) >> 2);
+
+ /* set ucode fimrware address */
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO,
+ lower_32_bits(adev->mes.ucode_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI,
+ upper_32_bits(adev->mes.ucode_fw_gpu_addr));
+
+ /* set ucode instruction cache boundary to 2M-1 */
+ WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF);
+
+ /* set ucode data firmware address */
+ WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO,
+ lower_32_bits(adev->mes.data_fw_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI,
+ upper_32_bits(adev->mes.data_fw_gpu_addr));
+
+ /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
+ WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF);
+
+ /* invalidate ICACHE */
+ data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
+
+ /* prime the ICACHE. */
+ data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data);
+
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ return 0;
+}
+
+static int mes_v10_1_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = mes_v10_1_init_microcode(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int mes_v10_1_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ mes_v10_1_free_microcode(adev);
+
+ return 0;
+}
+
+static int mes_v10_1_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = mes_v10_1_load_microcode(adev);
+ if (r) {
+ DRM_ERROR("failed to MES fw, r=%d\n", r);
+ return r;
+ }
+ } else {
+ DRM_ERROR("only support direct fw loading on MES\n");
+ return -EINVAL;
+ }
+
+ mes_v10_1_enable(adev, true);
+
+ return 0;
+}
+
+static int mes_v10_1_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ mes_v10_1_enable(adev, false);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
+ mes_v10_1_free_ucode_buffers(adev);
+
+ return 0;
+}
+
+static int mes_v10_1_suspend(void *handle)
+{
+ return 0;
+}
+
+static int mes_v10_1_resume(void *handle)
+{
+ return 0;
+}
+
+static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
+ .name = "mes_v10_1",
+ .sw_init = mes_v10_1_sw_init,
+ .sw_fini = mes_v10_1_sw_fini,
+ .hw_init = mes_v10_1_hw_init,
+ .hw_fini = mes_v10_1_hw_fini,
+ .suspend = mes_v10_1_suspend,
+ .resume = mes_v10_1_resume,
+};
+
+const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_MES,
+ .major = 10,
+ .minor = 1,
+ .rev = 0,
+ .funcs = &mes_v10_1_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h
new file mode 100644
index 000000000000..9afd6ddb01e9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __MES_V10_1_H__
+#define __MES_V10_1_H__
+
+extern const struct amdgpu_ip_block_version mes_v10_1_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 41a9a5779623..05d1d448c8f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
+ if (amdgpu_virt_support_skip_setting(adev))
+ return;
+
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
adev->vm_manager.vram_base_offset;
@@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
+ if (amdgpu_virt_support_skip_setting(adev))
+ return;
+
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
@@ -202,6 +208,9 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
{
+ if (amdgpu_virt_support_skip_setting(adev))
+ return;
+
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0XFFFFFFFF);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
@@ -338,11 +347,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
0);
WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
- /* Setup L2 cache */
- tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
- tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
- WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
- WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
+ if (!amdgpu_virt_support_skip_setting(adev)) {
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
+ }
}
/**
@@ -354,6 +365,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
{
u32 tmp;
+
+ if (amdgpu_virt_support_skip_setting(adev))
+ return;
+
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
new file mode 100644
index 000000000000..37a1a318ae63
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v2_0.h"
+
+#include "mmhub/mmhub_2_0_0_offset.h"
+#include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "mmhub/mmhub_2_0_0_default.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+static void mmhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev)
+{
+ uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ lower_32_bits(value));
+
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ upper_32_bits(value));
+}
+
+static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ mmhub_v2_0_init_gart_pt_regs(adev);
+
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Disable AGP. */
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, 0x00FFFFFF);
+
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ adev->gmc.vram_start >> 18);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ adev->gmc.vram_end >> 18);
+
+ /* Set default page address. */
+ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp);
+
+ tmp = mmMMVM_L2_CNTL3_DEFAULT;
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp);
+
+ tmp = mmMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL4, tmp);
+}
+
+static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+}
+
+static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ 2 * i, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ 2 * i, 0x1f);
+ }
+}
+
+int mmhub_v2_0_gart_enable(struct amdgpu_device *adev)
+{
+ if (amdgpu_sriov_vf(adev)) {
+ /*
+ * MMMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
+ * VF copy registers so vbios post doesn't program them, for
+ * SRIOV driver need to program them
+ */
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_BASE,
+ adev->gmc.vram_start >> 24);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_TOP,
+ adev->gmc.vram_end >> 24);
+ }
+
+ /* GART Enable. */
+ mmhub_v2_0_init_gart_aperture_regs(adev);
+ mmhub_v2_0_init_system_aperture_regs(adev);
+ mmhub_v2_0_init_tlb_regs(adev);
+ mmhub_v2_0_init_cache_regs(adev);
+
+ mmhub_v2_0_enable_system_domain(adev);
+ mmhub_v2_0_disable_identity_aperture(adev);
+ mmhub_v2_0_setup_vmid_config(adev);
+ mmhub_v2_0_program_invalidation(adev);
+
+ return 0;
+}
+
+void mmhub_v2_0_gart_disable(struct amdgpu_device *adev)
+{
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, i, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v2_0_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
+{
+ u32 tmp;
+ tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+void mmhub_v2_0_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL);
+
+}
+
+static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data, def1, data1;
+
+ def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
+
+ def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+
+ } else {
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
+
+ if (def1 != data1)
+ WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1);
+}
+
+static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
+}
+
+int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ mmhub_v2_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ mmhub_v2_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+{
+ int data, data1;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
+
+ data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if ((data & MM_ATC_L2_MISC_CG__ENABLE_MASK) &&
+ !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK |
+ DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)))
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
new file mode 100644
index 000000000000..db16f3ece218
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V2_0_H__
+#define __MMHUB_V2_0_H__
+
+int mmhub_v2_0_gart_enable(struct amdgpu_device *adev);
+void mmhub_v2_0_gart_disable(struct amdgpu_device *adev);
+void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value);
+void mmhub_v2_0_init(struct amdgpu_device *adev);
+int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state);
+void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 2471e7cf75ea..235548c0b41f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -26,6 +26,7 @@
#include "nbio/nbio_6_1_sh_mask.h"
#include "gc/gc_9_0_offset.h"
#include "gc/gc_9_0_sh_mask.h"
+#include "mp/mp_9_0_offset.h"
#include "soc15.h"
#include "vega10_ih.h"
#include "soc15_common.h"
@@ -343,7 +344,7 @@ flr_done:
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
- && amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT)
+ && adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)
amdgpu_device_gpu_recover(adev, NULL);
}
@@ -448,6 +449,20 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
}
+static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev)
+{
+ adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY;
+
+ /* Enable L1 security reg access mode by defaul, as non-security VF
+ * will no longer be supported.
+ */
+ adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC;
+
+ adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH;
+
+ adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING;
+}
+
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.req_full_gpu = xgpu_ai_request_full_gpu_access,
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
@@ -456,4 +471,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.trans_msg = xgpu_ai_mailbox_trans_msg,
.get_pp_clk = xgpu_ai_get_pp_clk,
.force_dpm_level = xgpu_ai_force_dpm_level,
+ .init_reg_access_mode = xgpu_ai_init_reg_access_mode,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
new file mode 100644
index 000000000000..e963746be11c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_ih.h"
+
+#include "oss/osssys_5_0_0_offset.h"
+#include "oss/osssys_5_0_0_sh_mask.h"
+
+#include "soc15_common.h"
+#include "navi10_ih.h"
+
+
+static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev);
+
+/**
+ * navi10_ih_enable_interrupts - Enable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enable the interrupt ring buffer (NAVI10).
+ */
+static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ adev->irq.ih.enabled = true;
+}
+
+/**
+ * navi10_ih_disable_interrupts - Disable the interrupt ring buffer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable the interrupt ring buffer (NAVI10).
+ */
+static void navi10_ih_disable_interrupts(struct amdgpu_device *adev)
+{
+ u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ /* set rptr, wptr to 0 */
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
+ adev->irq.ih.enabled = false;
+ adev->irq.ih.rptr = 0;
+}
+
+static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl)
+{
+ int rb_bufsz = order_base_2(ih->ring_size / 4);
+
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ MC_SPACE, ih->use_bus_addr ? 1 : 4);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_CLEAR, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_OVERFLOW_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register
+ * value is written to memory
+ */
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
+ WPTR_WRITEBACK_ENABLE, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0);
+
+ return ih_rb_cntl;
+}
+
+/**
+ * navi10_ih_irq_init - init and enable the interrupt ring
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate a ring buffer for the interrupt controller,
+ * enable the RLC, disable interrupts, enable the IH
+ * ring buffer and enable it (NAVI).
+ * Called at device load and reume.
+ * Returns 0 for success, errors for failure.
+ */
+static int navi10_ih_irq_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ih_ring *ih = &adev->irq.ih;
+ int ret = 0;
+ u32 ih_rb_cntl, ih_doorbell_rtpr, ih_chicken;
+ u32 tmp;
+
+ /* disable irqs */
+ navi10_ih_disable_interrupts(adev);
+
+ adev->nbio_funcs->ih_control(adev);
+
+ /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8);
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff);
+
+ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL);
+ ih_rb_cntl = navi10_ih_rb_cntl(ih, ih_rb_cntl);
+ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
+ !!adev->irq.msi_enabled);
+
+ if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) {
+ if (ih->use_bus_addr) {
+ ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
+ ih_chicken = REG_SET_FIELD(ih_chicken,
+ IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken);
+ }
+ }
+
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+
+ /* set the writeback address whether it's enabled or not */
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
+ lower_32_bits(ih->wptr_addr));
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI,
+ upper_32_bits(ih->wptr_addr) & 0xFFFF);
+
+ /* set rptr, wptr to 0 */
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
+
+ ih_doorbell_rtpr = RREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR);
+ if (ih->use_doorbell) {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, OFFSET,
+ ih->doorbell_index);
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, ENABLE, 1);
+ } else {
+ ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr,
+ IH_DOORBELL_RPTR, ENABLE, 0);
+ }
+ WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr);
+
+ adev->nbio_funcs->ih_doorbell_range(adev, ih->use_doorbell,
+ ih->doorbell_index);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
+ CLIENT18_IS_STORM_CLIENT, 1);
+ WREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL);
+ tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
+ WREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL, tmp);
+
+ pci_set_master(adev->pdev);
+
+ /* enable interrupts */
+ navi10_ih_enable_interrupts(adev);
+
+ return ret;
+}
+
+/**
+ * navi10_ih_irq_disable - disable interrupts
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Disable interrupts on the hw (NAVI10).
+ */
+static void navi10_ih_irq_disable(struct amdgpu_device *adev)
+{
+ navi10_ih_disable_interrupts(adev);
+
+ /* Wait and acknowledge irq */
+ mdelay(1);
+}
+
+/**
+ * navi10_ih_get_wptr - get the IH ring buffer wptr
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Get the IH ring buffer wptr from either the register
+ * or the writeback memory buffer (NAVI10). Also check for
+ * ring buffer overflow and deal with it.
+ * Returns the value of the wptr.
+ */
+static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 wptr, reg, tmp;
+
+ wptr = le32_to_cpu(*ih->wptr_cpu);
+
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR);
+ wptr = RREG32_NO_KIQ(reg);
+ if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+ goto out;
+ wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
+
+ /* When a ring buffer overflow happen start parsing interrupt
+ * from the last not overwritten vector (wptr + 32). Hopefully
+ * this should allow us to catch up.
+ */
+ tmp = (wptr + 32) & ih->ptr_mask;
+ dev_warn(adev->dev, "IH ring buffer overflow "
+ "(0x%08X, 0x%08X, 0x%08X)\n",
+ wptr, ih->rptr, tmp);
+ ih->rptr = tmp;
+
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL);
+ tmp = RREG32_NO_KIQ(reg);
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
+ WREG32_NO_KIQ(reg, tmp);
+out:
+ return (wptr & ih->ptr_mask);
+}
+
+/**
+ * navi10_ih_decode_iv - decode an interrupt vector
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Decodes the interrupt vector at the current rptr
+ * position and also advance the position.
+ */
+static void navi10_ih_decode_iv(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih,
+ struct amdgpu_iv_entry *entry)
+{
+ /* wptr/rptr are in bytes! */
+ u32 ring_index = ih->rptr >> 2;
+ uint32_t dw[8];
+
+ dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+ dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+ dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+ dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
+ dw[4] = le32_to_cpu(ih->ring[ring_index + 4]);
+ dw[5] = le32_to_cpu(ih->ring[ring_index + 5]);
+ dw[6] = le32_to_cpu(ih->ring[ring_index + 6]);
+ dw[7] = le32_to_cpu(ih->ring[ring_index + 7]);
+
+ entry->client_id = dw[0] & 0xff;
+ entry->src_id = (dw[0] >> 8) & 0xff;
+ entry->ring_id = (dw[0] >> 16) & 0xff;
+ entry->vmid = (dw[0] >> 24) & 0xf;
+ entry->vmid_src = (dw[0] >> 31);
+ entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
+ entry->timestamp_src = dw[2] >> 31;
+ entry->pasid = dw[3] & 0xffff;
+ entry->pasid_src = dw[3] >> 31;
+ entry->src_data[0] = dw[4];
+ entry->src_data[1] = dw[5];
+ entry->src_data[2] = dw[6];
+ entry->src_data[3] = dw[7];
+
+ /* wptr/rptr are in bytes! */
+ ih->rptr += 32;
+}
+
+/**
+ * navi10_ih_set_rptr - set the IH ring buffer rptr
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set the IH ring buffer rptr.
+ */
+static void navi10_ih_set_rptr(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ if (ih->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ *ih->rptr_cpu = ih->rptr;
+ WDOORBELL32(ih->doorbell_index, ih->rptr);
+ } else
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr);
+}
+
+static int navi10_ih_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ navi10_ih_set_interrupt_funcs(adev);
+ return 0;
+}
+
+static int navi10_ih_sw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool use_bus_addr;
+
+ /* use gpu virtual address for ih ring
+ * until ih_checken is programmed to allow
+ * use bus address for ih ring by psp bl */
+ use_bus_addr =
+ (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih.use_doorbell = true;
+ adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
+
+ r = amdgpu_irq_init(adev);
+
+ return r;
+}
+
+static int navi10_ih_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_fini(adev);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih);
+
+ return 0;
+}
+
+static int navi10_ih_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = navi10_ih_irq_init(adev);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int navi10_ih_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ navi10_ih_irq_disable(adev);
+
+ return 0;
+}
+
+static int navi10_ih_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return navi10_ih_hw_fini(adev);
+}
+
+static int navi10_ih_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return navi10_ih_hw_init(adev);
+}
+
+static bool navi10_ih_is_idle(void *handle)
+{
+ /* todo */
+ return true;
+}
+
+static int navi10_ih_wait_for_idle(void *handle)
+{
+ /* todo */
+ return -ETIMEDOUT;
+}
+
+static int navi10_ih_soft_reset(void *handle)
+{
+ /* todo */
+ return 0;
+}
+
+static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def, field_val;
+
+ if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) {
+ def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL);
+ field_val = enable ? 0 : 1;
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DBUS_MUX_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ DYN_CLK_SOFT_OVERRIDE, field_val);
+ data = REG_SET_FIELD(data, IH_CLK_CTRL,
+ REG_CLK_SOFT_OVERRIDE, field_val);
+ if (def != data)
+ WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data);
+ }
+
+ return;
+}
+
+static int navi10_ih_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ navi10_ih_update_clockgating_state(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ return 0;
+}
+
+static int navi10_ih_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void navi10_ih_get_clockgating_state(void *handle, u32 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL))
+ *flags |= AMD_CG_SUPPORT_IH_CG;
+
+ return;
+}
+
+static const struct amd_ip_funcs navi10_ih_ip_funcs = {
+ .name = "navi10_ih",
+ .early_init = navi10_ih_early_init,
+ .late_init = NULL,
+ .sw_init = navi10_ih_sw_init,
+ .sw_fini = navi10_ih_sw_fini,
+ .hw_init = navi10_ih_hw_init,
+ .hw_fini = navi10_ih_hw_fini,
+ .suspend = navi10_ih_suspend,
+ .resume = navi10_ih_resume,
+ .is_idle = navi10_ih_is_idle,
+ .wait_for_idle = navi10_ih_wait_for_idle,
+ .soft_reset = navi10_ih_soft_reset,
+ .set_clockgating_state = navi10_ih_set_clockgating_state,
+ .set_powergating_state = navi10_ih_set_powergating_state,
+ .get_clockgating_state = navi10_ih_get_clockgating_state,
+};
+
+static const struct amdgpu_ih_funcs navi10_ih_funcs = {
+ .get_wptr = navi10_ih_get_wptr,
+ .decode_iv = navi10_ih_decode_iv,
+ .set_rptr = navi10_ih_set_rptr
+};
+
+static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev)
+{
+ if (adev->irq.ih_funcs == NULL)
+ adev->irq.ih_funcs = &navi10_ih_funcs;
+}
+
+const struct amdgpu_ip_block_version navi10_ih_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_IH,
+ .major = 5,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &navi10_ih_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.h b/drivers/gpu/drm/amd/amdgpu/navi10_ih.h
new file mode 100644
index 000000000000..140fbdaaed17
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NAVI10_IH_H__
+#define __NAVI10_IH_H__
+
+extern const struct amdgpu_ip_block_version navi10_ih_ip_block;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
new file mode 100644
index 000000000000..55014ce8670a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "nv.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "navi10_ip_offset.h"
+
+int navi10_reg_base_init(struct amdgpu_device *adev)
+{
+ int r, i;
+
+ if (amdgpu_discovery) {
+ r = amdgpu_discovery_reg_base_init(adev);
+ if (r) {
+ DRM_WARN("failed to init reg base from ip discovery table, "
+ "fallback to legacy init method\n");
+ goto legacy_init;
+ }
+
+ return 0;
+ }
+
+legacy_init:
+ for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+ adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+ adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+ adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+ adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
+ adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
+ adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+ adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i]));
+ adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+ adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+ adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
+ }
+
+ return 0;
+}
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
new file mode 100644
index 000000000000..074a9a09c0a7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h
@@ -0,0 +1,4806 @@
+/*
+ * Copyright (C) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NAVI10_SDMA_PKT_OPEN_H_
+#define __NAVI10_SDMA_PKT_OPEN_H_
+
+#define SDMA_OP_NOP 0
+#define SDMA_OP_COPY 1
+#define SDMA_OP_WRITE 2
+#define SDMA_OP_INDIRECT 4
+#define SDMA_OP_FENCE 5
+#define SDMA_OP_TRAP 6
+#define SDMA_OP_SEM 7
+#define SDMA_OP_POLL_REGMEM 8
+#define SDMA_OP_COND_EXE 9
+#define SDMA_OP_ATOMIC 10
+#define SDMA_OP_CONST_FILL 11
+#define SDMA_OP_PTEPDE 12
+#define SDMA_OP_TIMESTAMP 13
+#define SDMA_OP_SRBM_WRITE 14
+#define SDMA_OP_PRE_EXE 15
+#define SDMA_OP_GPUVM_INV 16
+#define SDMA_OP_GCR_REQ 17
+#define SDMA_OP_DUMMY_TRAP 32
+#define SDMA_SUBOP_TIMESTAMP_SET 0
+#define SDMA_SUBOP_TIMESTAMP_GET 1
+#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2
+#define SDMA_SUBOP_COPY_LINEAR 0
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4
+#define SDMA_SUBOP_COPY_TILED 1
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6
+#define SDMA_SUBOP_COPY_SOA 3
+#define SDMA_SUBOP_COPY_DIRTY_PAGE 7
+#define SDMA_SUBOP_COPY_LINEAR_PHY 8
+#define SDMA_SUBOP_COPY_LINEAR_BC 16
+#define SDMA_SUBOP_COPY_TILED_BC 17
+#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_BC 20
+#define SDMA_SUBOP_COPY_TILED_SUB_WIND_BC 21
+#define SDMA_SUBOP_COPY_T2T_SUB_WIND_BC 22
+#define SDMA_SUBOP_WRITE_LINEAR 0
+#define SDMA_SUBOP_WRITE_TILED 1
+#define SDMA_SUBOP_WRITE_TILED_BC 17
+#define SDMA_SUBOP_PTEPDE_GEN 0
+#define SDMA_SUBOP_PTEPDE_COPY 1
+#define SDMA_SUBOP_PTEPDE_RMW 2
+#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS 3
+#define SDMA_SUBOP_DATA_FILL_MULTI 1
+#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1
+#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2
+#define SDMA_SUBOP_POLL_MEM_VERIFY 3
+#define HEADER_AGENT_DISPATCH 4
+#define HEADER_BARRIER 5
+#define SDMA_OP_AQL_COPY 0
+#define SDMA_OP_AQL_BARRIER_OR 0
+
+/*define for op field*/
+#define SDMA_PKT_HEADER_op_offset 0
+#define SDMA_PKT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_HEADER_op_shift 0
+#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_HEADER_sub_op_offset 0
+#define SDMA_PKT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_HEADER_sub_op_shift 8
+#define SDMA_PKT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_HEADER_sub_op_mask) << SDMA_PKT_HEADER_sub_op_shift)
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift)
+
+/*define for backwards field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift 25
+#define SDMA_PKT_COPY_LINEAR_HEADER_BACKWARDS(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask) << SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_offset 1
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift 22
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_offset 2
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift 30
+#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_DIRTY_PAGE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift)
+
+/*define for all field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_offset 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_ALL(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_offset 1
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask) << SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_PHYSICAL_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift)
+
+/*define for dst_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift)
+
+/*define for src_mtype field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask 0x00000007
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift 11
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift)
+
+/*define for src_l2_policy field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift 14
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for dst_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift 19
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift)
+
+/*define for dst_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift 20
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift)
+
+/*define for dst_log field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift 21
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LOG(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift)
+
+/*define for dst_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift 22
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift)
+
+/*define for dst_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift 23
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for src_gcc field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift 27
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift)
+
+/*define for src_sys field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift 28
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift)
+
+/*define for src_snoop field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift 30
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift)
+
+/*define for src_gpa field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_offset 2
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask 0x00000001
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift 31
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift)
+
+/*define for dst1_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST1_ADDR_LO word*/
+/*define for dst1_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift)
+
+/*define for DST1_ADDR_HI word*/
+/*define for dst1_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift)
+
+/*define for DST2_ADDR_LO word*/
+/*define for dst2_addr_31_0 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift)
+
+/*define for DST2_ADDR_HI word*/
+/*define for dst2_addr_63_32 field*/
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0
+#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for elementsize field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_offset 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask 0x00000007
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift 29
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift)
+
+/*define for src_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_offset 4
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift)
+
+/*define for DW_5 word*/
+/*define for src_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_offset 5
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_offset 6
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_offset 7
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_8 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_offset 8
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift)
+
+/*define for DW_9 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift)
+
+/*define for dst_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_offset 9
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift 13
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift)
+
+/*define for DW_10 word*/
+/*define for dst_slice_pitch field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_offset 10
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_offset 11
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift)
+
+/*define for DW_12 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift 0
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift 16
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift)
+
+/*define for dst_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift 22
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift 24
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift)
+
+/*define for src_ha field*/
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_offset 12
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask 0x00000001
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift 30
+#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_encrypt_mask) << SDMA_PKT_COPY_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_HEADER_tmz_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_width_mask) << SDMA_PKT_COPY_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_height_mask) << SDMA_PKT_COPY_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_dimension_mask) << SDMA_PKT_COPY_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mip_max_mask) << SDMA_PKT_COPY_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift)
+
+/*define for linear_cc field*/
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_shift 20
+#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_CC(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_cc_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_cc_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_COUNT_count_offset 12
+#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0
+#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_3_width_mask) << SDMA_PKT_COPY_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_height_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_x_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_y_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_COPY_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_z_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_BC_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_offset 7
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_BC_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 8
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 9
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_offset 11
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_COPY_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_COPY_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_BC_COUNT_count_mask) << SDMA_PKT_COPY_TILED_BC_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift)
+
+/*define for videocopy field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift)
+
+/*define for broadcast field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27
+#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift)
+
+/*define for TILED_ADDR_LO_0 word*/
+/*define for tiled_addr0_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift)
+
+/*define for TILED_ADDR_HI_0 word*/
+/*define for tiled_addr0_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift)
+
+/*define for TILED_ADDR_LO_1 word*/
+/*define for tiled_addr1_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift)
+
+/*define for TILED_ADDR_HI_1 word*/
+/*define for tiled_addr1_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift)
+
+/*define for DW_5 word*/
+/*define for width field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_offset 5
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_WIDTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift)
+
+/*define for DW_6 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_offset 6
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_DEPTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift)
+
+/*define for DW_7 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_DIMENSION(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_offset 7
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MIP_MAX(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift)
+
+/*define for DW_8 word*/
+/*define for x field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift)
+
+/*define for DW_9 word*/
+/*define for z field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift)
+
+/*define for DW_10 word*/
+/*define for dst2_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24
+#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for LINEAR_PITCH word*/
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift)
+
+/*define for LINEAR_SLICE_PITCH word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 14
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 15
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0
+#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_T2T_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_tmz_mask) << SDMA_PKT_COPY_T2T_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_T2T_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_shift)
+
+/*define for dcc_dir field*/
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_offset 0
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift 31
+#define SDMA_PKT_COPY_T2T_HEADER_DCC_DIR(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift)
+
+/*define for src_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift)
+
+/*define for src_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask) << SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift)
+
+/*define for src_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift)
+
+/*define for src_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_offset 6
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift)
+
+/*define for dst_swizzle_mode field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_T2T_DW_12_DST_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift)
+
+/*define for dst_dimension field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift 9
+#define SDMA_PKT_COPY_T2T_DW_12_DST_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift)
+
+/*define for dst_mip_max field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift 16
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift)
+
+/*define for dst_mip_id field*/
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_offset 12
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift 20
+#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_offset 15
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_offset 16
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_T2T_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_T2T_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_T2T_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_T2T_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_T2T_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_T2T_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_offset 17
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_T2T_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_T2T_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_T2T_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for src_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift)
+
+/*define for src_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_offset 3
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift)
+
+/*define for DW_4 word*/
+/*define for src_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift)
+
+/*define for src_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_offset 4
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift)
+
+/*define for DW_5 word*/
+/*define for src_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift)
+
+/*define for src_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_offset 5
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift)
+
+/*define for DW_6 word*/
+/*define for src_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift)
+
+/*define for src_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift)
+
+/*define for src_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift)
+
+/*define for src_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift)
+
+/*define for src_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift)
+
+/*define for src_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift)
+
+/*define for src_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift)
+
+/*define for src_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift)
+
+/*define for src_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_offset 6
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_offset 7
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_offset 8
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for dst_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift)
+
+/*define for dst_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_offset 9
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift)
+
+/*define for DW_10 word*/
+/*define for dst_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift)
+
+/*define for dst_width field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_offset 10
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift)
+
+/*define for DW_11 word*/
+/*define for dst_height field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift)
+
+/*define for dst_depth field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_offset 11
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask 0x00000FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift)
+
+/*define for DW_12 word*/
+/*define for dst_element_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift)
+
+/*define for dst_array_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift 3
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift)
+
+/*define for dst_mit_mode field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift 8
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift)
+
+/*define for dst_tilesplit_size field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift)
+
+/*define for dst_bank_w field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift 15
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift)
+
+/*define for dst_bank_h field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift 18
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift)
+
+/*define for dst_num_bank field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift 21
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift)
+
+/*define for dst_mat_aspt field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift)
+
+/*define for dst_pipe_config field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_offset 12
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift 26
+#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift)
+
+/*define for DW_13 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_offset 13
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift)
+
+/*define for DW_14 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift 0
+#define SDMA_PKT_COPY_T2T_BC_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift)
+
+/*define for dst_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift 16
+#define SDMA_PKT_COPY_T2T_BC_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_offset 14
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask 0x00000003
+#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift 24
+#define SDMA_PKT_COPY_T2T_BC_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift)
+
+/*define for dcc field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift 19
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift)
+
+/*define for mip_id field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift 20
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_ID(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x00001FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift)
+
+/*define for META_ADDR_LO word*/
+/*define for meta_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_offset 14
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift)
+
+/*define for META_ADDR_HI word*/
+/*define for meta_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_offset 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift)
+
+/*define for META_CONFIG word*/
+/*define for data_format field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask 0x0000007F
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift)
+
+/*define for color_transform_disable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift)
+
+/*define for alpha_is_on_msb field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift)
+
+/*define for number_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift)
+
+/*define for surface_type field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift)
+
+/*define for max_comp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift)
+
+/*define for max_uncomp_block_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift)
+
+/*define for write_compress_enable field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift 28
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift)
+
+/*define for meta_tmz field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_offset 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift 29
+#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_TILED_SUBWIN_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift)
+
+/*define for TILED_ADDR_LO word*/
+/*define for tiled_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift)
+
+/*define for TILED_ADDR_HI word*/
+/*define for tiled_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for tiled_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift)
+
+/*define for tiled_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_offset 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift)
+
+/*define for DW_4 word*/
+/*define for tiled_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift)
+
+/*define for width field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_offset 4
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift)
+
+/*define for DW_5 word*/
+/*define for height field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_offset 5
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift)
+
+/*define for DW_6 word*/
+/*define for element_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask 0x0000000F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift 3
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift 15
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift 18
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift 21
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MAT_ASPT(x) ((x & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_offset 6
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask 0x0000001F
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift 26
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 7
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 8
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+/*define for DW_9 word*/
+/*define for linear_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift)
+
+/*define for linear_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_offset 9
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift)
+
+/*define for DW_10 word*/
+/*define for linear_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift)
+
+/*define for linear_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_offset 10
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift)
+
+/*define for DW_11 word*/
+/*define for linear_slice_pitch field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_offset 11
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask 0x0FFFFFFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift)
+
+/*define for DW_12 word*/
+/*define for rect_x field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift)
+
+/*define for rect_y field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_offset 12
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask 0x00003FFF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift)
+
+/*define for DW_13 word*/
+/*define for rect_z field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask 0x000007FF
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift 0
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift 16
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift)
+
+/*define for tile_sw field*/
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_offset 13
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask 0x00000003
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift 24
+#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COPY_STRUCT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8
+#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift 18
+#define SDMA_PKT_COPY_STRUCT_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask) << SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift)
+
+/*define for detile field*/
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001
+#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31
+#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift)
+
+/*define for SB_ADDR_LO word*/
+/*define for sb_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift)
+
+/*define for SB_ADDR_HI word*/
+/*define for sb_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift)
+
+/*define for START_INDEX word*/
+/*define for start_index field*/
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0
+#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0
+#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift)
+
+/*define for DW_5 word*/
+/*define for stride field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF
+#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift)
+
+/*define for linear_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 16
+#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift)
+
+/*define for struct_sw field*/
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003
+#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 24
+#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift)
+
+/*define for LINEAR_ADDR_LO word*/
+/*define for linear_addr_31_0 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift)
+
+/*define for LINEAR_ADDR_HI word*/
+/*define for linear_addr_63_32 field*/
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0
+#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_UNTILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_UNTILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_UNTILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24
+#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift)
+
+/*define for encrypt field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift 16
+#define SDMA_PKT_WRITE_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_offset 0
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_WRITE_TILED_HEADER_tmz_shift 18
+#define SDMA_PKT_WRITE_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_TILED_HEADER_tmz_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift)
+
+/*define for swizzle_mode field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift)
+
+/*define for dimension field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_5_dimension_shift 9
+#define SDMA_PKT_WRITE_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_dimension_mask) << SDMA_PKT_WRITE_TILED_DW_5_dimension_shift)
+
+/*define for mip_max field*/
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_offset 5
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask) << SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x00001FFF
+#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_TILED_BC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DW_3 word*/
+/*define for width field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_offset 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift)
+
+/*define for DW_4 word*/
+/*define for height field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift)
+
+/*define for depth field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_offset 4
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift)
+
+/*define for DW_5 word*/
+/*define for element_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift)
+
+/*define for array_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask 0x0000000F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift 3
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift)
+
+/*define for mit_mode field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift 8
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift)
+
+/*define for tilesplit_size field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask 0x00000007
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift 11
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift)
+
+/*define for bank_w field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift 15
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift)
+
+/*define for bank_h field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift 18
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift)
+
+/*define for num_bank field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift 21
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift)
+
+/*define for mat_aspt field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift)
+
+/*define for pipe_config field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_offset 5
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask 0x0000001F
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift 26
+#define SDMA_PKT_WRITE_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift)
+
+/*define for DW_6 word*/
+/*define for x field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift)
+
+/*define for y field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_offset 6
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask 0x00003FFF
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift 16
+#define SDMA_PKT_WRITE_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift)
+
+/*define for DW_7 word*/
+/*define for z field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask 0x000007FF
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_offset 7
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask 0x00000003
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift 24
+#define SDMA_PKT_WRITE_TILED_BC_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_offset 8
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask 0x000FFFFF
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift 2
+#define SDMA_PKT_WRITE_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_offset 9
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift 0
+#define SDMA_PKT_WRITE_TILED_BC_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift 18
+#define SDMA_PKT_PTEPDE_COPY_HEADER_TMZ(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_offset 5
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_offset 6
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_offset 7
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_PTEPDE_COPY_COUNT_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_COUNT_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_count_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_COPY_BACKWARDS packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift)
+
+/*define for pte_size field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask 0x00000003
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift 28
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTE_SIZE(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift)
+
+/*define for direction field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift 30
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_DIRECTION(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift)
+
+/*define for ptepde_op field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_offset 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask 0x00000001
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift 31
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_BIT_FOR_DW word*/
+/*define for mask_first_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_FIRST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift)
+
+/*define for mask_last_xfer field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_offset 5
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift 8
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_LAST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift)
+
+/*define for COUNT_IN_32B_XFER word*/
+/*define for count field*/
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_offset 6
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask 0x0001FFFF
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift 0
+#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PTEPDE_RMW packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_op_shift 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift 8
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift 16
+#define SDMA_PKT_PTEPDE_RMW_HEADER_MTYPE(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift 19
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GCC(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift 20
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SYS(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift 22
+#define SDMA_PKT_PTEPDE_RMW_HEADER_SNP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift 23
+#define SDMA_PKT_PTEPDE_RMW_HEADER_GPA(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_offset 0
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift 24
+#define SDMA_PKT_PTEPDE_RMW_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift)
+
+/*define for MASK_LO word*/
+/*define for mask_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_offset 3
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_LO_MASK_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask) << SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift)
+
+/*define for MASK_HI word*/
+/*define for mask_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_offset 4
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_MASK_HI_MASK_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask) << SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift)
+
+/*define for VALUE_LO word*/
+/*define for value_31_0 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_offset 5
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_VALUE_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift)
+
+/*define for VALUE_HI word*/
+/*define for value_63_32 field*/
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_offset 6
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift 0
+#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_VALUE_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_WRITE_INCR packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0
+#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8
+#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for MASK_DW0 word*/
+/*define for mask_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift)
+
+/*define for MASK_DW1 word*/
+/*define for mask_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift)
+
+/*define for INIT_DW0 word*/
+/*define for init_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift)
+
+/*define for INIT_DW1 word*/
+/*define for init_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift)
+
+/*define for INCR_DW0 word*/
+/*define for incr_dw0 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift)
+
+/*define for INCR_DW1 word*/
+/*define for incr_dw1 field*/
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0
+#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9
+#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF
+#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0
+#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_INDIRECT packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_INDIRECT_HEADER_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_op_shift 0
+#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8
+#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F
+#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16
+#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift)
+
+/*define for priv field*/
+#define SDMA_PKT_INDIRECT_HEADER_priv_offset 0
+#define SDMA_PKT_INDIRECT_HEADER_priv_mask 0x00000001
+#define SDMA_PKT_INDIRECT_HEADER_priv_shift 31
+#define SDMA_PKT_INDIRECT_HEADER_PRIV(x) (((x) & SDMA_PKT_INDIRECT_HEADER_priv_mask) << SDMA_PKT_INDIRECT_HEADER_priv_shift)
+
+/*define for BASE_LO word*/
+/*define for ib_base_31_0 field*/
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0
+#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift)
+
+/*define for BASE_HI word*/
+/*define for ib_base_63_32 field*/
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0
+#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift)
+
+/*define for IB_SIZE word*/
+/*define for ib_size field*/
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF
+#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0
+#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift)
+
+/*define for CSA_ADDR_LO word*/
+/*define for csa_addr_31_0 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift)
+
+/*define for CSA_ADDR_HI word*/
+/*define for csa_addr_63_32 field*/
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0
+#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SEMAPHORE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0
+#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift)
+
+/*define for write_one field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29
+#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift)
+
+/*define for signal field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30
+#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift)
+
+/*define for mailbox field*/
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001
+#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31
+#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_FENCE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_FENCE_HEADER_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_op_shift 0
+#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0
+#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8
+#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift)
+
+/*define for mtype field*/
+#define SDMA_PKT_FENCE_HEADER_mtype_offset 0
+#define SDMA_PKT_FENCE_HEADER_mtype_mask 0x00000007
+#define SDMA_PKT_FENCE_HEADER_mtype_shift 16
+#define SDMA_PKT_FENCE_HEADER_MTYPE(x) (((x) & SDMA_PKT_FENCE_HEADER_mtype_mask) << SDMA_PKT_FENCE_HEADER_mtype_shift)
+
+/*define for gcc field*/
+#define SDMA_PKT_FENCE_HEADER_gcc_offset 0
+#define SDMA_PKT_FENCE_HEADER_gcc_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gcc_shift 19
+#define SDMA_PKT_FENCE_HEADER_GCC(x) (((x) & SDMA_PKT_FENCE_HEADER_gcc_mask) << SDMA_PKT_FENCE_HEADER_gcc_shift)
+
+/*define for sys field*/
+#define SDMA_PKT_FENCE_HEADER_sys_offset 0
+#define SDMA_PKT_FENCE_HEADER_sys_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_sys_shift 20
+#define SDMA_PKT_FENCE_HEADER_SYS(x) (((x) & SDMA_PKT_FENCE_HEADER_sys_mask) << SDMA_PKT_FENCE_HEADER_sys_shift)
+
+/*define for snp field*/
+#define SDMA_PKT_FENCE_HEADER_snp_offset 0
+#define SDMA_PKT_FENCE_HEADER_snp_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_snp_shift 22
+#define SDMA_PKT_FENCE_HEADER_SNP(x) (((x) & SDMA_PKT_FENCE_HEADER_snp_mask) << SDMA_PKT_FENCE_HEADER_snp_shift)
+
+/*define for gpa field*/
+#define SDMA_PKT_FENCE_HEADER_gpa_offset 0
+#define SDMA_PKT_FENCE_HEADER_gpa_mask 0x00000001
+#define SDMA_PKT_FENCE_HEADER_gpa_shift 23
+#define SDMA_PKT_FENCE_HEADER_GPA(x) (((x) & SDMA_PKT_FENCE_HEADER_gpa_mask) << SDMA_PKT_FENCE_HEADER_gpa_shift)
+
+/*define for l2_policy field*/
+#define SDMA_PKT_FENCE_HEADER_l2_policy_offset 0
+#define SDMA_PKT_FENCE_HEADER_l2_policy_mask 0x00000003
+#define SDMA_PKT_FENCE_HEADER_l2_policy_shift 24
+#define SDMA_PKT_FENCE_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_l2_policy_mask) << SDMA_PKT_FENCE_HEADER_l2_policy_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_FENCE_DATA_data_offset 3
+#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_FENCE_DATA_data_shift 0
+#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_SRBM_WRITE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8
+#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift)
+
+/*define for byte_en field*/
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F
+#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28
+#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift)
+
+/*define for ADDR word*/
+/*define for addr field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0003FFFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0
+#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift)
+
+/*define for apertureid field*/
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_offset 1
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask 0x00000FFF
+#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift 20
+#define SDMA_PKT_SRBM_WRITE_ADDR_APERTUREID(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask) << SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift)
+
+/*define for DATA word*/
+/*define for data field*/
+#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2
+#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF
+#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0
+#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift)
+
+
+/*
+** Definitions for SDMA_PKT_PRE_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0
+#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift)
+
+/*define for dev_sel field*/
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF
+#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16
+#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_COND_EXE packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_COND_EXE_HEADER_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_op_shift 0
+#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8
+#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift)
+
+/*define for REFERENCE word*/
+/*define for reference field*/
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF
+#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0
+#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift)
+
+/*define for EXEC_COUNT word*/
+/*define for exec_count field*/
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0
+#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_CONSTANT_FILL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift)
+
+/*define for sw field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16
+#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift)
+
+/*define for fillsize field*/
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003
+#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30
+#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for DATA word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0
+#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x003FFFFF
+#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0
+#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DATA_FILL_MULTI packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift 8
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift)
+
+/*define for memlog_clr field*/
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_offset 0
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask 0x00000001
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift 31
+#define SDMA_PKT_DATA_FILL_MULTI_HEADER_MEMLOG_CLR(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift)
+
+/*define for BYTE_STRIDE word*/
+/*define for byte_stride field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_offset 1
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_BYTE_STRIDE(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift)
+
+/*define for DMA_COUNT word*/
+/*define for dma_count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_offset 2
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_DMA_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask) << SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_offset 3
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_offset 4
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for BYTE_COUNT word*/
+/*define for count field*/
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_offset 5
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask 0x03FFFFFF
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift 0
+#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REGMEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift)
+
+/*define for hdp_flush field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26
+#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift)
+
+/*define for func field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007
+#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28
+#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift)
+
+/*define for mem_poll field*/
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001
+#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31
+#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift)
+
+/*define for VALUE word*/
+/*define for value field*/
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0
+#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift)
+
+/*define for MASK word*/
+/*define for mask field*/
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0
+#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift)
+
+/*define for DW5 word*/
+/*define for interval field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF
+#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0
+#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift)
+
+/*define for retry_count field*/
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF
+#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16
+#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_REG_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for SRC_ADDR word*/
+/*define for addr_31_2 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_offset 1
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask 0x3FFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_ADDR_31_2(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 2
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 3
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_DBIT_WRITE_MEM packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift)
+
+/*define for ea field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_offset 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask 0x00000003
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift 16
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_EA(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift)
+
+/*define for START_PAGE word*/
+/*define for addr_31_4 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_offset 3
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask 0x0FFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_ADDR_31_4(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift)
+
+/*define for PAGE_NUM word*/
+/*define for page_num_31_0 field*/
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_offset 4
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift 0
+#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_PAGE_NUM_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift)
+
+
+/*
+** Definitions for SDMA_PKT_POLL_MEM_VERIFY packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift 8
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift)
+
+/*define for mode field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_offset 0
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask 0x00000001
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift 31
+#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_MODE(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift)
+
+/*define for PATTERN word*/
+/*define for pattern field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_offset 1
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_PATTERN(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask) << SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift)
+
+/*define for CMP0_ADDR_START_LO word*/
+/*define for cmp0_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_offset 2
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_CMP0_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift)
+
+/*define for CMP0_ADDR_START_HI word*/
+/*define for cmp0_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_offset 3
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_CMP0_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift)
+
+/*define for CMP0_ADDR_END_LO word*/
+/*define for cmp1_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_offset 4
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift)
+
+/*define for CMP0_ADDR_END_HI word*/
+/*define for cmp1_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_offset 5
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift)
+
+/*define for CMP1_ADDR_START_LO word*/
+/*define for cmp1_start_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_offset 6
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_CMP1_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift)
+
+/*define for CMP1_ADDR_START_HI word*/
+/*define for cmp1_start_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_offset 7
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_CMP1_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift)
+
+/*define for CMP1_ADDR_END_LO word*/
+/*define for cmp1_end_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_offset 8
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift)
+
+/*define for CMP1_ADDR_END_HI word*/
+/*define for cmp1_end_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_offset 9
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift)
+
+/*define for REC_ADDR_LO word*/
+/*define for rec_31_0 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_offset 10
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_REC_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift)
+
+/*define for REC_ADDR_HI word*/
+/*define for rec_63_32 field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_offset 11
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_REC_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift)
+
+/*define for RESERVED word*/
+/*define for reserved field*/
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_offset 12
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask 0xFFFFFFFF
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift 0
+#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift)
+
+
+/*
+** Definitions for SDMA_PKT_ATOMIC packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_ATOMIC_HEADER_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_ATOMIC_HEADER_op_shift 0
+#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift)
+
+/*define for loop field*/
+#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16
+#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift)
+
+/*define for tmz field*/
+#define SDMA_PKT_ATOMIC_HEADER_tmz_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_tmz_mask 0x00000001
+#define SDMA_PKT_ATOMIC_HEADER_tmz_shift 18
+#define SDMA_PKT_ATOMIC_HEADER_TMZ(x) (((x) & SDMA_PKT_ATOMIC_HEADER_tmz_mask) << SDMA_PKT_ATOMIC_HEADER_tmz_shift)
+
+/*define for atomic_op field*/
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F
+#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25
+#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift)
+
+/*define for ADDR_LO word*/
+/*define for addr_31_0 field*/
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift)
+
+/*define for ADDR_HI word*/
+/*define for addr_63_32 field*/
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0
+#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift)
+
+/*define for SRC_DATA_LO word*/
+/*define for src_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift)
+
+/*define for SRC_DATA_HI word*/
+/*define for src_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift)
+
+/*define for CMP_DATA_LO word*/
+/*define for cmp_data_31_0 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift)
+
+/*define for CMP_DATA_HI word*/
+/*define for cmp_data_63_32 field*/
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0
+#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift)
+
+/*define for LOOP_INTERVAL word*/
+/*define for loop_interval field*/
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0
+#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_SET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift)
+
+/*define for INIT_DATA_LO word*/
+/*define for init_data_31_0 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift)
+
+/*define for INIT_DATA_HI word*/
+/*define for init_data_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift)
+
+/*define for WRITE_ADDR_LO word*/
+/*define for write_addr_31_3 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift)
+
+/*define for WRITE_ADDR_HI word*/
+/*define for write_addr_63_32 field*/
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0
+#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift)
+
+
+/*
+** Definitions for SDMA_PKT_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_DUMMY_TRAP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_op_shift 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_offset 0
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift 8
+#define SDMA_PKT_DUMMY_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift)
+
+/*define for INT_CONTEXT word*/
+/*define for int_context field*/
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_offset 1
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift 0
+#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GPUVM_INV packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_op_shift 0
+#define SDMA_PKT_GPUVM_INV_HEADER_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_offset 0
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift 8
+#define SDMA_PKT_GPUVM_INV_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for per_vmid_inv_req field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask 0x0000FFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_PER_VMID_INV_REQ(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift)
+
+/*define for flush_type field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask 0x00000007
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift 16
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FLUSH_TYPE(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift)
+
+/*define for l2_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift 19
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift)
+
+/*define for l2_pde0 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift 20
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE0(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift)
+
+/*define for l2_pde1 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift 21
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE1(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift)
+
+/*define for l2_pde2 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift 22
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE2(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift)
+
+/*define for l1_ptes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift 23
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L1_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift)
+
+/*define for clr_protection_fault_status_addr field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift 24
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_CLR_PROTECTION_FAULT_STATUS_ADDR(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift)
+
+/*define for log_request field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift 25
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_LOG_REQUEST(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift)
+
+/*define for four_kilobytes field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_offset 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift 26
+#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FOUR_KILOBYTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for s field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask 0x00000001
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_S(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift)
+
+/*define for page_va_42_12 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_offset 2
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask 0x7FFFFFFF
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift 1
+#define SDMA_PKT_GPUVM_INV_PAYLOAD2_PAGE_VA_42_12(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for page_va_47_43 field*/
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_offset 3
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask 0x0000003F
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift 0
+#define SDMA_PKT_GPUVM_INV_PAYLOAD3_PAGE_VA_47_43(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift)
+
+
+/*
+** Definitions for SDMA_PKT_GCR_REQ packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_op_shift 0
+#define SDMA_PKT_GCR_REQ_HEADER_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_op_mask) << SDMA_PKT_GCR_REQ_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_offset 0
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_GCR_REQ_HEADER_sub_op_shift 8
+#define SDMA_PKT_GCR_REQ_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_sub_op_mask) << SDMA_PKT_GCR_REQ_HEADER_sub_op_shift)
+
+/*define for PAYLOAD1 word*/
+/*define for base_va_31_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_offset 1
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask 0x01FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift 7
+#define SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift)
+
+/*define for PAYLOAD2 word*/
+/*define for base_va_47_32 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_offset 2
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift)
+
+/*define for gcr_control_15_0 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_offset 2
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift 16
+#define SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift)
+
+/*define for PAYLOAD3 word*/
+/*define for gcr_control_18_16 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask 0x00000007
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift)
+
+/*define for limit_va_31_7 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_offset 3
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask 0x01FFFFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift 7
+#define SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift)
+
+/*define for PAYLOAD4 word*/
+/*define for limit_va_47_32 field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_offset 4
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask 0x0000FFFF
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift 0
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift)
+
+/*define for vmid field*/
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_offset 4
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask 0x0000000F
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift 24
+#define SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift)
+
+
+/*
+** Definitions for SDMA_PKT_NOP packet
+*/
+
+/*define for HEADER word*/
+/*define for op field*/
+#define SDMA_PKT_NOP_HEADER_op_offset 0
+#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_op_shift 0
+#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift)
+
+/*define for sub_op field*/
+#define SDMA_PKT_NOP_HEADER_sub_op_offset 0
+#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF
+#define SDMA_PKT_NOP_HEADER_sub_op_shift 8
+#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
+
+/*define for count field*/
+#define SDMA_PKT_NOP_HEADER_count_offset 0
+#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
+#define SDMA_PKT_NOP_HEADER_count_shift 16
+#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
+
+/*define for DATA0 word*/
+/*define for data0 field*/
+#define SDMA_PKT_NOP_DATA0_data0_offset 1
+#define SDMA_PKT_NOP_DATA0_data0_mask 0xFFFFFFFF
+#define SDMA_PKT_NOP_DATA0_data0_shift 0
+#define SDMA_PKT_NOP_DATA0_DATA0(x) (((x) & SDMA_PKT_NOP_DATA0_data0_mask) << SDMA_PKT_NOP_DATA0_data0_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_HEADER packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_format_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_HEADER_HEADER_format_shift 0
+#define SDMA_AQL_PKT_HEADER_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_format_mask) << SDMA_AQL_PKT_HEADER_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_HEADER_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_HEADER_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_barrier_mask) << SDMA_AQL_PKT_HEADER_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_HEADER_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_HEADER_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_HEADER_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_reserved_mask) << SDMA_AQL_PKT_HEADER_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_op_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_HEADER_HEADER_op_shift 16
+#define SDMA_AQL_PKT_HEADER_HEADER_OP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_op_mask) << SDMA_AQL_PKT_HEADER_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_HEADER_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_HEADER_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_subop_mask) << SDMA_AQL_PKT_HEADER_HEADER_subop_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_COPY_LINEAR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for RETURN_ADDR_LO word*/
+/*define for return_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_offset 2
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_RETURN_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift)
+
+/*define for RETURN_ADDR_HI word*/
+/*define for return_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_offset 3
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_RETURN_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift)
+
+/*define for COUNT word*/
+/*define for count field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_offset 4
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift)
+
+/*define for PARAMETER word*/
+/*define for dst_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift)
+
+/*define for src_sw field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 5
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24
+#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift)
+
+/*define for SRC_ADDR_LO word*/
+/*define for src_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 6
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift)
+
+/*define for SRC_ADDR_HI word*/
+/*define for src_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 7
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift)
+
+/*define for DST_ADDR_LO word*/
+/*define for dst_addr_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 8
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift)
+
+/*define for DST_ADDR_HI word*/
+/*define for dst_addr_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 9
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift)
+
+/*define for RESERVED_DW10 word*/
+/*define for reserved_dw10 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_offset 10
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_RESERVED_DW10(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift)
+
+/*define for RESERVED_DW11 word*/
+/*define for reserved_dw11 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_offset 11
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_RESERVED_DW11(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift)
+
+/*define for RESERVED_DW12 word*/
+/*define for reserved_dw12 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_offset 12
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+/*
+** Definitions for SDMA_AQL_PKT_BARRIER_OR packet
+*/
+
+/*define for HEADER word*/
+/*define for format field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask 0x000000FF
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift)
+
+/*define for barrier field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask 0x00000001
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift 8
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift)
+
+/*define for acquire_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift 9
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift)
+
+/*define for release_fence_scope field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask 0x00000003
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift 11
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift)
+
+/*define for reserved field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift 13
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift)
+
+/*define for op field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask 0x0000000F
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift 16
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift)
+
+/*define for subop field*/
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_offset 0
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask 0x00000007
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift 20
+#define SDMA_AQL_PKT_BARRIER_OR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift)
+
+/*define for RESERVED_DW1 word*/
+/*define for reserved_dw1 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_offset 1
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift)
+
+/*define for DEPENDENT_ADDR_0_LO word*/
+/*define for dependent_addr_0_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_offset 2
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_DEPENDENT_ADDR_0_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift)
+
+/*define for DEPENDENT_ADDR_0_HI word*/
+/*define for dependent_addr_0_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_offset 3
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_DEPENDENT_ADDR_0_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift)
+
+/*define for DEPENDENT_ADDR_1_LO word*/
+/*define for dependent_addr_1_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_offset 4
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_DEPENDENT_ADDR_1_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift)
+
+/*define for DEPENDENT_ADDR_1_HI word*/
+/*define for dependent_addr_1_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_offset 5
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_DEPENDENT_ADDR_1_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift)
+
+/*define for DEPENDENT_ADDR_2_LO word*/
+/*define for dependent_addr_2_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_offset 6
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_DEPENDENT_ADDR_2_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift)
+
+/*define for DEPENDENT_ADDR_2_HI word*/
+/*define for dependent_addr_2_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_offset 7
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_DEPENDENT_ADDR_2_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift)
+
+/*define for DEPENDENT_ADDR_3_LO word*/
+/*define for dependent_addr_3_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_offset 8
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_DEPENDENT_ADDR_3_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift)
+
+/*define for DEPENDENT_ADDR_3_HI word*/
+/*define for dependent_addr_3_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_offset 9
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_DEPENDENT_ADDR_3_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift)
+
+/*define for DEPENDENT_ADDR_4_LO word*/
+/*define for dependent_addr_4_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_offset 10
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_DEPENDENT_ADDR_4_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift)
+
+/*define for DEPENDENT_ADDR_4_HI word*/
+/*define for dependent_addr_4_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_offset 11
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_DEPENDENT_ADDR_4_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift)
+
+/*define for RESERVED_DW12 word*/
+/*define for reserved_dw12 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_offset 12
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift)
+
+/*define for RESERVED_DW13 word*/
+/*define for reserved_dw13 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_offset 13
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift)
+
+/*define for COMPLETION_SIGNAL_LO word*/
+/*define for completion_signal_31_0 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift)
+
+/*define for COMPLETION_SIGNAL_HI word*/
+/*define for completion_signal_63_32 field*/
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
+#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
+
+
+#endif /* __NAVI10_SDMA_PKT_OPEN_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
new file mode 100644
index 000000000000..835d7b1a841f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "nbio_v2_3.h"
+
+#include "nbio/nbio_2_3_default.h"
+#include "nbio/nbio_2_3_offset.h"
+#include "nbio/nbio_2_3_sh_mask.h"
+
+#define smnPCIE_CONFIG_CNTL 0x11180044
+#define smnCPM_CONTROL 0x11180460
+#define smnPCIE_CNTL2 0x11180070
+
+static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+
+ tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v2_3_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN,
+ BIF_FB_EN__FB_READ_EN_MASK |
+ BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
+}
+
+static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg)
+ WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+ else
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+}
+
+static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index,
+ int doorbell_size)
+{
+ u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_SDMA0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_SDMA0_DOORBELL_RANGE, SIZE,
+ doorbell_size);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_SDMA0_DOORBELL_RANGE, SIZE,
+ 0);
+
+ WREG32(reg, doorbell_range);
+}
+
+static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32(reg, doorbell_range);
+}
+
+static void nbio_v2_3_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN,
+ enable ? 1 : 0);
+}
+
+static void nbio_v2_3_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 tmp = 0;
+
+ if (enable) {
+ tmp = REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_EN, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+ REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL,
+ DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_BASE_LOW,
+ lower_32_bits(adev->doorbell.base));
+ WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+ upper_32_bits(adev->doorbell.base));
+ }
+
+ WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL,
+ tmp);
+}
+
+
+static void nbio_v2_3_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ BIF_IH_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ BIF_IH_DOORBELL_RANGE, SIZE,
+ 2);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range,
+ BIF_IH_DOORBELL_RANGE, SIZE,
+ 0);
+
+ WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range);
+}
+
+static void nbio_v2_3_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
+ /*
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL,
+ IH_DUMMY_RD_OVERRIDE, 0);
+
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL,
+ IH_REQ_NONSNOOP_EN, 0);
+
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
+}
+
+static void nbio_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnCPM_CONTROL);
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) {
+ data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_PCIE(smnCPM_CONTROL, data);
+}
+
+static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
+ data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_PCIE(smnPCIE_CNTL2, data);
+}
+
+static void nbio_v2_3_get_clockgating_state(struct amdgpu_device *adev,
+ u32 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_PCIE(smnCPM_CONTROL);
+ if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+static u32 nbio_v2_3_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_GPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v2_3_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_GPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v2_3_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
+}
+
+static u32 nbio_v2_3_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
+}
+
+const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = {
+ .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v2_3_detect_hw_virt(struct amdgpu_device *adev)
+{
+ uint32_t reg;
+
+ reg = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_RCC_IOV_FUNC_IDENTIFIER);
+ if (reg & 1)
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
+
+ if (reg & 0x80000000)
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
+
+ if (!reg) {
+ if (is_virtual_machine()) /* passthrough mode exclus sriov mod */
+ adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
+ }
+}
+
+static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_CONFIG_CNTL);
+ data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1);
+ data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1);
+
+ if (def != data)
+ WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);
+}
+
+const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
+ .hdp_flush_reg = &nbio_v2_3_hdp_flush_reg,
+ .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v2_3_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v2_3_get_pcie_data_offset,
+ .get_rev_id = nbio_v2_3_get_rev_id,
+ .mc_access_enable = nbio_v2_3_mc_access_enable,
+ .hdp_flush = nbio_v2_3_hdp_flush,
+ .get_memsize = nbio_v2_3_get_memsize,
+ .sdma_doorbell_range = nbio_v2_3_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v2_3_vcn_doorbell_range,
+ .enable_doorbell_aperture = nbio_v2_3_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v2_3_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v2_3_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v2_3_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v2_3_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v2_3_get_clockgating_state,
+ .ih_control = nbio_v2_3_ih_control,
+ .init_registers = nbio_v2_3_init_registers,
+ .detect_hw_virt = nbio_v2_3_detect_hw_virt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
new file mode 100644
index 000000000000..5ae52085f6b7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V2_3_H__
+#define __NBIO_V2_3_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 1cdb98ad2db3..73419fa38159 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -29,9 +29,18 @@
#include "nbio/nbio_7_0_sh_mask.h"
#include "nbio/nbio_7_0_smn.h"
#include "vega10_enum.h"
+#include <uapi/linux/kfd_ioctl.h>
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
+static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
- WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
else
- amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
- NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
}
static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
@@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
.ih_control = nbio_v7_0_ih_control,
.init_registers = nbio_v7_0_init_registers,
.detect_hw_virt = nbio_v7_0_detect_hw_virt,
+ .remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index c69d51598cfe..bfaaa327ae3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -27,9 +27,18 @@
#include "nbio/nbio_7_4_offset.h"
#include "nbio/nbio_7_4_sh_mask.h"
#include "nbio/nbio_7_4_0_smn.h"
+#include <uapi/linux/kfd_ioctl.h>
#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
+static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+ WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+ adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (!ring || !ring->funcs->emit_wreg)
- WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
else
- amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
- NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
}
static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
@@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
.ih_control = nbio_v7_4_ih_control,
.init_registers = nbio_v7_4_init_registers,
.detect_hw_virt = nbio_v7_4_detect_hw_virt,
+ .remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
new file mode 100644
index 000000000000..ad430cbcd72f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -0,0 +1,823 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "hdp/hdp_5_0_0_offset.h"
+#include "hdp/hdp_5_0_0_sh_mask.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+#include "gmc_v10_0.h"
+#include "gfxhub_v2_0.h"
+#include "mmhub_v2_0.h"
+#include "nv.h"
+#include "navi10_ih.h"
+#include "gfx_v10_0.h"
+#include "sdma_v5_0.h"
+#include "vcn_v2_0.h"
+#include "dce_virtual.h"
+#include "mes_v10_1.h"
+
+static const struct amd_ip_funcs nv_common_ip_funcs;
+
+/*
+ * Indirect registers accessor
+ */
+static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+ address = adev->nbio_funcs->get_pcie_index_offset(adev);
+ data = adev->nbio_funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, reg);
+ (void)RREG32(address);
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+ return r;
+}
+
+static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = adev->nbio_funcs->get_pcie_index_offset(adev);
+ data = adev->nbio_funcs->get_pcie_data_offset(adev);
+
+ spin_lock_irqsave(&adev->pcie_idx_lock, flags);
+ WREG32(address, reg);
+ (void)RREG32(address);
+ WREG32(data, v);
+ (void)RREG32(data);
+ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+}
+
+static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+
+ address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ return r;
+}
+
+static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ WREG32(data, (v));
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+}
+
+static u32 nv_get_config_memsize(struct amdgpu_device *adev)
+{
+ return adev->nbio_funcs->get_memsize(adev);
+}
+
+static u32 nv_get_xclk(struct amdgpu_device *adev)
+{
+ return adev->clock.spll.reference_freq;
+}
+
+
+void nv_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+ u32 grbm_gfx_cntl = 0;
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl);
+}
+
+static void nv_vga_set_state(struct amdgpu_device *adev, bool state)
+{
+ /* todo */
+}
+
+static bool nv_read_disabled_bios(struct amdgpu_device *adev)
+{
+ /* todo */
+ return false;
+}
+
+static bool nv_read_bios_from_rom(struct amdgpu_device *adev,
+ u8 *bios, u32 length_bytes)
+{
+ /* TODO: will implement it when SMU header is available */
+ return false;
+}
+
+static struct soc15_allowed_register_entry nv_allowed_read_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE0)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE1)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE2)},
+ { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE3)},
+#if 0 /* TODO: will set it when SDMA header is available */
+ { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_STATUS_REG)},
+ { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_STATUS_REG)},
+#endif
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT2)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT3)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
+};
+
+static uint32_t nv_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ uint32_t val;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+}
+
+static uint32_t nv_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ return nv_read_indexed_register(adev, se_num, sh_num, reg_offset);
+ } else {
+ if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG))
+ return adev->gfx.config.gb_addr_config;
+ return RREG32(reg_offset);
+ }
+}
+
+static int nv_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+ struct soc15_allowed_register_entry *en;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(nv_allowed_read_registers); i++) {
+ en = &nv_allowed_read_registers[i];
+ if (reg_offset !=
+ (adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset))
+ continue;
+
+ *value = nv_get_register_value(adev,
+ nv_allowed_read_registers[i].grbm_indexed,
+ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+#if 0
+static void nv_gpu_pci_config_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+
+ dev_info(adev->dev, "GPU pci config reset\n");
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+ /* reset */
+ amdgpu_pci_config_reset(adev);
+
+ udelay(100);
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = nbio_v2_3_get_memsize(adev);
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+
+}
+#endif
+
+static int nv_asic_mode1_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+ int ret = 0;
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ dev_info(adev->dev, "GPU mode1 reset\n");
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+
+ pci_save_state(adev->pdev);
+
+ ret = psp_gpu_reset(adev);
+ if (ret)
+ dev_err(adev->dev, "GPU mode1 reset failed\n");
+
+ pci_restore_state(adev->pdev);
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = adev->nbio_funcs->get_memsize(adev);
+
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return ret;
+}
+static int nv_asic_reset(struct amdgpu_device *adev)
+{
+
+ /* FIXME: it doesn't work since vega10 */
+#if 0
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ nv_gpu_pci_config_reset(adev);
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+#endif
+ int ret = 0;
+ struct smu_context *smu = &adev->smu;
+
+ if (smu_baco_is_support(smu))
+ ret = smu_baco_reset(smu);
+ else
+ ret = nv_asic_mode1_reset(adev);
+
+ return ret;
+}
+
+static int nv_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
+{
+ /* todo */
+ return 0;
+}
+
+static int nv_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
+{
+ /* todo */
+ return 0;
+}
+
+static void nv_pcie_gen3_enable(struct amdgpu_device *adev)
+{
+ if (pci_is_root_bus(adev->pdev->bus))
+ return;
+
+ if (amdgpu_pcie_gen2 == 0)
+ return;
+
+ if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
+ return;
+
+ /* todo */
+}
+
+static void nv_program_aspm(struct amdgpu_device *adev)
+{
+
+ if (amdgpu_aspm == 0)
+ return;
+
+ /* todo */
+}
+
+static void nv_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ adev->nbio_funcs->enable_doorbell_aperture(adev, enable);
+ adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable);
+}
+
+static const struct amdgpu_ip_block_version nv_common_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &nv_common_ip_funcs,
+};
+
+int nv_set_ip_blocks(struct amdgpu_device *adev)
+{
+ /* Set IP register base before any HW register access */
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ navi10_reg_base_init(adev);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ adev->nbio_funcs = &nbio_v2_3_funcs;
+
+ adev->nbio_funcs->detect_hw_virt(adev);
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ amdgpu_device_ip_block_add(adev, &nv_common_ip_block);
+ amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
+ is_support_sw_smu(adev))
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
+#if defined(CONFIG_DRM_AMD_DC)
+ else if (amdgpu_device_has_dc_support(adev))
+ amdgpu_device_ip_block_add(adev, &dm_ip_block);
+#else
+# warning "Enable CONFIG_DRM_AMD_DC for display support on navi."
+#endif
+ amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block);
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
+ is_support_sw_smu(adev))
+ amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+ if (adev->enable_mes)
+ amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static uint32_t nv_get_rev_id(struct amdgpu_device *adev)
+{
+ return adev->nbio_funcs->get_rev_id(adev);
+}
+
+static void nv_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ adev->nbio_funcs->hdp_flush(adev, ring);
+}
+
+static void nv_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+ } else {
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
+ }
+}
+
+static bool nv_need_full_reset(struct amdgpu_device *adev)
+{
+ return true;
+}
+
+static void nv_get_pcie_usage(struct amdgpu_device *adev,
+ uint64_t *count0,
+ uint64_t *count1)
+{
+ /*TODO*/
+}
+
+static bool nv_need_reset_on_init(struct amdgpu_device *adev)
+{
+#if 0
+ u32 sol_reg;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return true;
+#endif
+ /* TODO: re-enable it when mode1 reset is functional */
+ return false;
+}
+
+static void nv_init_doorbell_index(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7;
+ adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
+ adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7;
+ adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1;
+ adev->doorbell_index.sdma_doorbell_range = 20;
+}
+
+static const struct amdgpu_asic_funcs nv_asic_funcs =
+{
+ .read_disabled_bios = &nv_read_disabled_bios,
+ .read_bios_from_rom = &nv_read_bios_from_rom,
+ .read_register = &nv_read_register,
+ .reset = &nv_asic_reset,
+ .set_vga_state = &nv_vga_set_state,
+ .get_xclk = &nv_get_xclk,
+ .set_uvd_clocks = &nv_set_uvd_clocks,
+ .set_vce_clocks = &nv_set_vce_clocks,
+ .get_config_memsize = &nv_get_config_memsize,
+ .flush_hdp = &nv_flush_hdp,
+ .invalidate_hdp = &nv_invalidate_hdp,
+ .init_doorbell_index = &nv_init_doorbell_index,
+ .need_full_reset = &nv_need_full_reset,
+ .get_pcie_usage = &nv_get_pcie_usage,
+ .need_reset_on_init = &nv_need_reset_on_init,
+};
+
+static int nv_common_early_init(void *handle)
+{
+ bool psp_enabled = false;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->smc_rreg = NULL;
+ adev->smc_wreg = NULL;
+ adev->pcie_rreg = &nv_pcie_rreg;
+ adev->pcie_wreg = &nv_pcie_wreg;
+
+ /* TODO: will add them during VCN v2 implementation */
+ adev->uvd_ctx_rreg = NULL;
+ adev->uvd_ctx_wreg = NULL;
+
+ adev->didt_rreg = &nv_didt_rreg;
+ adev->didt_wreg = &nv_didt_wreg;
+
+ adev->asic_funcs = &nv_asic_funcs;
+
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) &&
+ (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP)))
+ psp_enabled = true;
+
+ adev->rev_id = nv_get_rev_id(adev);
+ adev->external_rev_id = 0xff;
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS;
+ adev->pg_flags = AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG |
+ AMD_PG_SUPPORT_MMHUB |
+ AMD_PG_SUPPORT_ATHUB;
+ adev->external_rev_id = adev->rev_id + 0x1;
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nv_common_late_init(void *handle)
+{
+ return 0;
+}
+
+static int nv_common_sw_init(void *handle)
+{
+ return 0;
+}
+
+static int nv_common_sw_fini(void *handle)
+{
+ return 0;
+}
+
+static int nv_common_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* enable pcie gen2/3 link */
+ nv_pcie_gen3_enable(adev);
+ /* enable aspm */
+ nv_program_aspm(adev);
+ /* setup nbio registers */
+ adev->nbio_funcs->init_registers(adev);
+ /* enable the doorbell aperture */
+ nv_enable_doorbell_aperture(adev, true);
+
+ return 0;
+}
+
+static int nv_common_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* disable the doorbell aperture */
+ nv_enable_doorbell_aperture(adev, false);
+
+ return 0;
+}
+
+static int nv_common_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return nv_common_hw_fini(adev);
+}
+
+static int nv_common_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return nv_common_hw_init(adev);
+}
+
+static bool nv_common_is_idle(void *handle)
+{
+ return true;
+}
+
+static int nv_common_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int nv_common_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static void nv_update_hdp_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl, hdp_clk_cntl1;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch,
+ * forced on IPH & RC clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ IPH_MEM_CLK_SOFT_OVERRIDE, 1);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* HDP 5.0 doesn't support dynamic power mode switch,
+ * disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_LS_EN, enable);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, enable);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_DS_EN, enable);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, enable);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ IPH_MEM_POWER_SD_EN, enable);
+ /* RC should not use shut down mode, fallback to ds */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, enable);
+ }
+
+ WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* restore IPH & RC clock override after clock/power mode changing */
+ WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1);
+}
+
+static void nv_update_hdp_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL);
+
+ if (enable) {
+ hdp_clk_cntl &=
+ ~(uint32_t)
+ (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK);
+ } else {
+ hdp_clk_cntl |= HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK;
+ }
+
+ WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static int nv_common_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ adev->nbio_funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ adev->nbio_funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ nv_update_hdp_mem_power_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ nv_update_hdp_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int nv_common_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* TODO */
+ return 0;
+}
+
+static void nv_common_get_clockgating_state(void *handle, u32 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t tmp;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ adev->nbio_funcs->get_clockgating_state(adev, flags);
+
+ /* AMD_CG_SUPPORT_HDP_MGCG */
+ tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL);
+ if (!(tmp & (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK)))
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+
+ return;
+}
+
+static const struct amd_ip_funcs nv_common_ip_funcs = {
+ .name = "nv_common",
+ .early_init = nv_common_early_init,
+ .late_init = nv_common_late_init,
+ .sw_init = nv_common_sw_init,
+ .sw_fini = nv_common_sw_fini,
+ .hw_init = nv_common_hw_init,
+ .hw_fini = nv_common_hw_fini,
+ .suspend = nv_common_suspend,
+ .resume = nv_common_resume,
+ .is_idle = nv_common_is_idle,
+ .wait_for_idle = nv_common_wait_for_idle,
+ .soft_reset = nv_common_soft_reset,
+ .set_clockgating_state = nv_common_set_clockgating_state,
+ .set_powergating_state = nv_common_set_powergating_state,
+ .get_clockgating_state = nv_common_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h
new file mode 100644
index 000000000000..639c54933cc5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NV_H__
+#define __NV_H__
+
+#include "nbio_v2_3.h"
+
+void nv_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+int nv_set_ip_blocks(struct amdgpu_device *adev);
+int navi10_reg_base_init(struct amdgpu_device *adev);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h
new file mode 100644
index 000000000000..1de984647dbb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nvd.h
@@ -0,0 +1,418 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef NVD_H
+#define NVD_H
+
+/**
+ * Navi's PM4 definitions
+ */
+#define PACKET_TYPE0 0
+#define PACKET_TYPE1 1
+#define PACKET_TYPE2 2
+#define PACKET_TYPE3 3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
+ ((reg) & 0xFFFF) | \
+ ((n) & 0x3FFF) << 16)
+#define CP_PACKET2 0x80000000
+#define PACKET2_PAD_SHIFT 0
+#define PACKET2_PAD_MASK (0x3fffffff << 0)
+
+#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
+ (((op) & 0xFF) << 8) | \
+ ((n) & 0x3FFF) << 16)
+
+#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+
+/* Packet 3 types */
+#define PACKET3_NOP 0x10
+#define PACKET3_SET_BASE 0x11
+#define PACKET3_BASE_INDEX(x) ((x) << 0)
+#define CE_PARTITION_BASE 3
+#define PACKET3_CLEAR_STATE 0x12
+#define PACKET3_INDEX_BUFFER_SIZE 0x13
+#define PACKET3_DISPATCH_DIRECT 0x15
+#define PACKET3_DISPATCH_INDIRECT 0x16
+#define PACKET3_INDIRECT_BUFFER_END 0x17
+#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19
+#define PACKET3_ATOMIC_GDS 0x1D
+#define PACKET3_ATOMIC_MEM 0x1E
+#define PACKET3_OCCLUSION_QUERY 0x1F
+#define PACKET3_SET_PREDICATION 0x20
+#define PACKET3_REG_RMW 0x21
+#define PACKET3_COND_EXEC 0x22
+#define PACKET3_PRED_EXEC 0x23
+#define PACKET3_DRAW_INDIRECT 0x24
+#define PACKET3_DRAW_INDEX_INDIRECT 0x25
+#define PACKET3_INDEX_BASE 0x26
+#define PACKET3_DRAW_INDEX_2 0x27
+#define PACKET3_CONTEXT_CONTROL 0x28
+#define PACKET3_INDEX_TYPE 0x2A
+#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
+#define PACKET3_DRAW_INDEX_AUTO 0x2D
+#define PACKET3_NUM_INSTANCES 0x2F
+#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
+#define PACKET3_INDIRECT_BUFFER_PRIV 0x32
+#define PACKET3_INDIRECT_BUFFER_CNST 0x33
+#define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33
+#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
+#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
+#define PACKET3_DRAW_PREAMBLE 0x36
+#define PACKET3_WRITE_DATA 0x37
+#define WRITE_DATA_DST_SEL(x) ((x) << 8)
+ /* 0 - register
+ * 1 - memory (sync - via GRBM)
+ * 2 - gl2
+ * 3 - gds
+ * 4 - reserved
+ * 5 - memory (async - direct)
+ */
+#define WR_ONE_ADDR (1 << 16)
+#define WR_CONFIRM (1 << 20)
+#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
+ /* 0 - me
+ * 1 - pfp
+ * 2 - ce
+ */
+#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
+#define PACKET3_MEM_SEMAPHORE 0x39
+# define PACKET3_SEM_USE_MAILBOX (0x1 << 16)
+# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */
+# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
+# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
+#define PACKET3_DRAW_INDEX_MULTI_INST 0x3A
+#define PACKET3_COPY_DW 0x3B
+#define PACKET3_WAIT_REG_MEM 0x3C
+#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
+ /* 0 - always
+ * 1 - <
+ * 2 - <=
+ * 3 - ==
+ * 4 - !=
+ * 5 - >=
+ * 6 - >
+ */
+#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
+ /* 0 - reg
+ * 1 - mem
+ */
+#define WAIT_REG_MEM_OPERATION(x) ((x) << 6)
+ /* 0 - wait_reg_mem
+ * 1 - wr_wait_wr_reg
+ */
+#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
+ /* 0 - me
+ * 1 - pfp
+ */
+#define PACKET3_INDIRECT_BUFFER 0x3F
+#define INDIRECT_BUFFER_VALID (1 << 23)
+#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28)
+ /* 0 - LRU
+ * 1 - Stream
+ * 2 - Bypass
+ */
+#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21)
+#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30)
+#define PACKET3_COND_INDIRECT_BUFFER 0x3F
+#define PACKET3_COPY_DATA 0x40
+#define PACKET3_CP_DMA 0x41
+#define PACKET3_PFP_SYNC_ME 0x42
+#define PACKET3_SURFACE_SYNC 0x43
+#define PACKET3_ME_INITIALIZE 0x44
+#define PACKET3_COND_WRITE 0x45
+#define PACKET3_EVENT_WRITE 0x46
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+ /* 0 - any non-TS event
+ * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
+ * 2 - SAMPLE_PIPELINESTAT
+ * 3 - SAMPLE_STREAMOUTSTAT*
+ * 4 - *S_PARTIAL_FLUSH
+ */
+#define PACKET3_EVENT_WRITE_EOP 0x47
+#define PACKET3_EVENT_WRITE_EOS 0x48
+#define PACKET3_RELEASE_MEM 0x49
+#define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0)
+#define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8)
+#define PACKET3_RELEASE_MEM_GCR_GLM_WB (1 << 12)
+#define PACKET3_RELEASE_MEM_GCR_GLM_INV (1 << 13)
+#define PACKET3_RELEASE_MEM_GCR_GLV_INV (1 << 14)
+#define PACKET3_RELEASE_MEM_GCR_GL1_INV (1 << 15)
+#define PACKET3_RELEASE_MEM_GCR_GL2_US (1 << 16)
+#define PACKET3_RELEASE_MEM_GCR_GL2_RANGE (1 << 17)
+#define PACKET3_RELEASE_MEM_GCR_GL2_DISCARD (1 << 19)
+#define PACKET3_RELEASE_MEM_GCR_GL2_INV (1 << 20)
+#define PACKET3_RELEASE_MEM_GCR_GL2_WB (1 << 21)
+#define PACKET3_RELEASE_MEM_GCR_SEQ (1 << 22)
+#define PACKET3_RELEASE_MEM_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - cache_policy__me_release_mem__lru
+ * 1 - cache_policy__me_release_mem__stream
+ * 2 - cache_policy__me_release_mem__noa
+ * 3 - cache_policy__me_release_mem__bypass
+ */
+#define PACKET3_RELEASE_MEM_EXECUTE (1 << 28)
+
+#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29)
+ /* 0 - discard
+ * 1 - send low 32bit data
+ * 2 - send 64bit data
+ * 3 - send 64bit GPU counter value
+ * 4 - send 64bit sys counter value
+ */
+#define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24)
+ /* 0 - none
+ * 1 - interrupt only (DATA_SEL = 0)
+ * 2 - interrupt when data write is confirmed
+ */
+#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16)
+ /* 0 - MC
+ * 1 - TC/L2
+ */
+
+
+
+#define PACKET3_PREAMBLE_CNTL 0x4A
+# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
+# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
+#define PACKET3_DMA_DATA 0x50
+/* 1. header
+ * 2. CONTROL
+ * 3. SRC_ADDR_LO or DATA [31:0]
+ * 4. SRC_ADDR_HI [31:0]
+ * 5. DST_ADDR_LO [31:0]
+ * 6. DST_ADDR_HI [7:0]
+ * 7. COMMAND [31:26] | BYTE_COUNT [25:0]
+ */
+/* CONTROL */
+# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
+ /* 0 - ME
+ * 1 - PFP
+ */
+# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
+ /* 0 - DST_ADDR using DAS
+ * 1 - GDS
+ * 3 - DST_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
+ /* 0 - LRU
+ * 1 - Stream
+ */
+# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
+ /* 0 - SRC_ADDR using SAS
+ * 1 - GDS
+ * 2 - DATA
+ * 3 - SRC_ADDR using L2
+ */
+# define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
+/* COMMAND */
+# define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
+ /* 0 - memory
+ * 1 - register
+ */
+# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
+# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
+# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
+#define PACKET3_CONTEXT_REG_RMW 0x51
+#define PACKET3_GFX_CNTX_UPDATE 0x52
+#define PACKET3_BLK_CNTX_UPDATE 0x53
+#define PACKET3_INCR_UPDT_STATE 0x55
+#define PACKET3_ACQUIRE_MEM 0x58
+#define PACKET3_REWIND 0x59
+#define PACKET3_INTERRUPT 0x5A
+#define PACKET3_GEN_PDEPTE 0x5B
+#define PACKET3_INDIRECT_BUFFER_PASID 0x5C
+#define PACKET3_PRIME_UTCL2 0x5D
+#define PACKET3_LOAD_UCONFIG_REG 0x5E
+#define PACKET3_LOAD_SH_REG 0x5F
+#define PACKET3_LOAD_CONFIG_REG 0x60
+#define PACKET3_LOAD_CONTEXT_REG 0x61
+#define PACKET3_LOAD_COMPUTE_STATE 0x62
+#define PACKET3_LOAD_SH_REG_INDEX 0x63
+#define PACKET3_SET_CONFIG_REG 0x68
+#define PACKET3_SET_CONFIG_REG_START 0x00002000
+#define PACKET3_SET_CONFIG_REG_END 0x00002c00
+#define PACKET3_SET_CONTEXT_REG 0x69
+#define PACKET3_SET_CONTEXT_REG_START 0x0000a000
+#define PACKET3_SET_CONTEXT_REG_END 0x0000a400
+#define PACKET3_SET_CONTEXT_REG_INDEX 0x6A
+#define PACKET3_SET_VGPR_REG_DI_MULTI 0x71
+#define PACKET3_SET_SH_REG_DI 0x72
+#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
+#define PACKET3_SET_SH_REG_DI_MULTI 0x74
+#define PACKET3_GFX_PIPE_LOCK 0x75
+#define PACKET3_SET_SH_REG 0x76
+#define PACKET3_SET_SH_REG_START 0x00002c00
+#define PACKET3_SET_SH_REG_END 0x00003000
+#define PACKET3_SET_SH_REG_OFFSET 0x77
+#define PACKET3_SET_QUEUE_REG 0x78
+#define PACKET3_SET_UCONFIG_REG 0x79
+#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
+#define PACKET3_SET_UCONFIG_REG_END 0x0000c400
+#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A
+#define PACKET3_FORWARD_HEADER 0x7C
+#define PACKET3_SCRATCH_RAM_WRITE 0x7D
+#define PACKET3_SCRATCH_RAM_READ 0x7E
+#define PACKET3_LOAD_CONST_RAM 0x80
+#define PACKET3_WRITE_CONST_RAM 0x81
+#define PACKET3_DUMP_CONST_RAM 0x83
+#define PACKET3_INCREMENT_CE_COUNTER 0x84
+#define PACKET3_INCREMENT_DE_COUNTER 0x85
+#define PACKET3_WAIT_ON_CE_COUNTER 0x86
+#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
+#define PACKET3_SWITCH_BUFFER 0x8B
+#define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C
+#define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C
+#define PACKET3_DISPATCH_DRAW 0x8D
+#define PACKET3_DISPATCH_DRAW_ACE 0x8D
+#define PACKET3_GET_LOD_STATS 0x8E
+#define PACKET3_DRAW_MULTI_PREAMBLE 0x8F
+#define PACKET3_FRAME_CONTROL 0x90
+# define FRAME_CMD(x) ((x) << 28)
+ /*
+ * x=0: tmz_begin
+ * x=1: tmz_end
+ */
+#define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91
+#define PACKET3_WAIT_REG_MEM64 0x93
+#define PACKET3_COND_PREEMPT 0x94
+#define PACKET3_HDP_FLUSH 0x95
+#define PACKET3_COPY_DATA_RB 0x96
+#define PACKET3_INVALIDATE_TLBS 0x98
+# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
+# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
+# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+#define PACKET3_AQL_PACKET 0x99
+#define PACKET3_DMA_DATA_FILL_MULTI 0x9A
+#define PACKET3_SET_SH_REG_INDEX 0x9B
+#define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C
+#define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D
+#define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E
+#define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F
+#define PACKET3_SET_RESOURCES 0xA0
+/* 1. header
+ * 2. CONTROL
+ * 3. QUEUE_MASK_LO [31:0]
+ * 4. QUEUE_MASK_HI [31:0]
+ * 5. GWS_MASK_LO [31:0]
+ * 6. GWS_MASK_HI [31:0]
+ * 7. OAC_MASK [15:0]
+ * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
+ */
+# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0)
+# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
+# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29)
+#define PACKET3_MAP_PROCESS 0xA1
+#define PACKET3_MAP_QUEUES 0xA2
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. MQD_ADDR_LO [31:0]
+ * 5. MQD_ADDR_HI [31:0]
+ * 6. WPTR_ADDR_LO [31:0]
+ * 7. WPTR_ADDR_HI [31:0]
+ */
+/* CONTROL */
+# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
+# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8)
+# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13)
+# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16)
+# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18)
+# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21)
+# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24)
+# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
+# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
+/* CONTROL2 */
+# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1)
+# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
+#define PACKET3_UNMAP_QUEUES 0xA3
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. CONTROL3
+ * 5. CONTROL4
+ * 6. CONTROL5
+ */
+/* CONTROL */
+# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0)
+ /* 0 - PREEMPT_QUEUES
+ * 1 - RESET_QUEUES
+ * 2 - DISABLE_PROCESS_QUEUES
+ * 3 - PREEMPT_QUEUES_NO_UNMAP
+ */
+# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4)
+# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26)
+# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29)
+/* CONTROL2a */
+# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0)
+/* CONTROL2b */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
+/* CONTROL3a */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
+/* CONTROL3b */
+# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0)
+/* CONTROL4 */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
+/* CONTROL5 */
+# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
+#define PACKET3_QUERY_STATUS 0xA4
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. ADDR_LO [31:0]
+ * 5. ADDR_HI [31:0]
+ * 6. DATA_LO [31:0]
+ * 7. DATA_HI [31:0]
+ */
+/* CONTROL */
+# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0)
+# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28)
+# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30)
+/* CONTROL2a */
+# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0)
+/* CONTROL2b */
+# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2)
+# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25)
+#define PACKET3_RUN_LIST 0xA5
+#define PACKET3_MAP_PROCESS_VM 0xA6
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 2f79765b4bdb..5080a73a95a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -80,6 +80,18 @@ struct psp_gfx_ctrl
*/
#define GFX_FLAG_RESPONSE 0x80000000
+/* Gbr IH registers ID */
+enum ih_reg_id {
+ IH_RB = 0, // IH_RB_CNTL
+ IH_RB_RNG1 = 1, // IH_RB_CNTL_RING1
+ IH_RB_RNG2 = 2, // IH_RB_CNTL_RING2
+};
+
+/* Command to setup Gibraltar IH register */
+struct psp_gfx_cmd_gbr_ih_reg {
+ uint32_t reg_value; /* Value to be set to the IH_RB_CNTL... register*/
+ enum ih_reg_id reg_id; /* ID of the register */
+};
/* TEE Gfx Command IDs for the ring buffer interface. */
enum psp_gfx_cmd_id
@@ -94,9 +106,12 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
+ GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
+ /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */
+ GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */
+ GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */
};
-
/* Command to load Trusted Application binary into PSP OS. */
struct psp_gfx_cmd_load_ta
{
@@ -168,33 +183,59 @@ struct psp_gfx_cmd_setup_tmr
/* FW types for GFX_CMD_ID_LOAD_IP_FW command. Limit 31. */
-enum psp_gfx_fw_type
-{
- GFX_FW_TYPE_NONE = 0,
- GFX_FW_TYPE_CP_ME = 1,
- GFX_FW_TYPE_CP_PFP = 2,
- GFX_FW_TYPE_CP_CE = 3,
- GFX_FW_TYPE_CP_MEC = 4,
- GFX_FW_TYPE_CP_MEC_ME1 = 5,
- GFX_FW_TYPE_CP_MEC_ME2 = 6,
- GFX_FW_TYPE_RLC_V = 7,
- GFX_FW_TYPE_RLC_G = 8,
- GFX_FW_TYPE_SDMA0 = 9,
- GFX_FW_TYPE_SDMA1 = 10,
- GFX_FW_TYPE_DMCU_ERAM = 11,
- GFX_FW_TYPE_DMCU_ISR = 12,
- GFX_FW_TYPE_VCN = 13,
- GFX_FW_TYPE_UVD = 14,
- GFX_FW_TYPE_VCE = 15,
- GFX_FW_TYPE_ISP = 16,
- GFX_FW_TYPE_ACP = 17,
- GFX_FW_TYPE_SMU = 18,
- GFX_FW_TYPE_MMSCH = 19,
- GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20,
- GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21,
- GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL = 22,
- GFX_FW_TYPE_UVD1 = 23,
- GFX_FW_TYPE_MAX = 24
+enum psp_gfx_fw_type {
+ GFX_FW_TYPE_NONE = 0, /* */
+ GFX_FW_TYPE_CP_ME = 1, /* CP-ME VG + RV */
+ GFX_FW_TYPE_CP_PFP = 2, /* CP-PFP VG + RV */
+ GFX_FW_TYPE_CP_CE = 3, /* CP-CE VG + RV */
+ GFX_FW_TYPE_CP_MEC = 4, /* CP-MEC FW VG + RV */
+ GFX_FW_TYPE_CP_MEC_ME1 = 5, /* CP-MEC Jump Table 1 VG + RV */
+ GFX_FW_TYPE_CP_MEC_ME2 = 6, /* CP-MEC Jump Table 2 VG */
+ GFX_FW_TYPE_RLC_V = 7, /* RLC-V VG */
+ GFX_FW_TYPE_RLC_G = 8, /* RLC-G VG + RV */
+ GFX_FW_TYPE_SDMA0 = 9, /* SDMA0 VG + RV */
+ GFX_FW_TYPE_SDMA1 = 10, /* SDMA1 VG */
+ GFX_FW_TYPE_DMCU_ERAM = 11, /* DMCU-ERAM VG + RV */
+ GFX_FW_TYPE_DMCU_ISR = 12, /* DMCU-ISR VG + RV */
+ GFX_FW_TYPE_VCN = 13, /* VCN RV */
+ GFX_FW_TYPE_UVD = 14, /* UVD VG */
+ GFX_FW_TYPE_VCE = 15, /* VCE VG */
+ GFX_FW_TYPE_ISP = 16, /* ISP RV */
+ GFX_FW_TYPE_ACP = 17, /* ACP RV */
+ GFX_FW_TYPE_SMU = 18, /* SMU VG */
+ GFX_FW_TYPE_MMSCH = 19, /* MMSCH VG */
+ GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, /* RLC GPM VG + RV */
+ GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, /* RLC SRM VG + RV */
+ GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL = 22, /* RLC CNTL VG + RV */
+ GFX_FW_TYPE_UVD1 = 23, /* UVD1 VG-20 */
+ GFX_FW_TYPE_TOC = 24, /* TOC NV-10 */
+ GFX_FW_TYPE_RLC_P = 25, /* RLC P NV */
+ GFX_FW_TYPE_RLX6 = 26, /* RLX6 NV */
+ GFX_FW_TYPE_GLOBAL_TAP_DELAYS = 27, /* GLOBAL TAP DELAYS NV */
+ GFX_FW_TYPE_SE0_TAP_DELAYS = 28, /* SE0 TAP DELAYS NV */
+ GFX_FW_TYPE_SE1_TAP_DELAYS = 29, /* SE1 TAP DELAYS NV */
+ GFX_FW_TYPE_GLOBAL_SE0_SE1_SKEW_DELAYS = 30, /* GLOBAL SE0/1 SKEW DELAYS NV */
+ GFX_FW_TYPE_SDMA0_JT = 31, /* SDMA0 JT NV */
+ GFX_FW_TYPE_SDMA1_JT = 32, /* SDNA1 JT NV */
+ GFX_FW_TYPE_CP_MES = 33, /* CP MES NV */
+ GFX_FW_TYPE_MES_STACK = 34, /* MES STACK NV */
+ GFX_FW_TYPE_RLC_SRM_DRAM_SR = 35, /* RLC SRM DRAM NV */
+ GFX_FW_TYPE_RLCG_SCRATCH_SR = 36, /* RLCG SCRATCH NV */
+ GFX_FW_TYPE_RLCP_SCRATCH_SR = 37, /* RLCP SCRATCH NV */
+ GFX_FW_TYPE_RLCV_SCRATCH_SR = 38, /* RLCV SCRATCH NV */
+ GFX_FW_TYPE_RLX6_DRAM_SR = 39, /* RLX6 DRAM NV */
+ GFX_FW_TYPE_SDMA0_PG_CONTEXT = 40, /* SDMA0 PG CONTEXT NV */
+ GFX_FW_TYPE_SDMA1_PG_CONTEXT = 41, /* SDMA1 PG CONTEXT NV */
+ GFX_FW_TYPE_GLOBAL_MUX_SELECT_RAM = 42, /* GLOBAL MUX SEL RAM NV */
+ GFX_FW_TYPE_SE0_MUX_SELECT_RAM = 43, /* SE0 MUX SEL RAM NV */
+ GFX_FW_TYPE_SE1_MUX_SELECT_RAM = 44, /* SE1 MUX SEL RAM NV */
+ GFX_FW_TYPE_ACCUM_CTRL_RAM = 45, /* ACCUM CTRL RAM NV */
+ GFX_FW_TYPE_RLCP_CAM = 46, /* RLCP CAM NV */
+ GFX_FW_TYPE_RLC_SPP_CAM_EXT = 47, /* RLC SPP CAM EXT NV */
+ GFX_FW_TYPE_RLX6_DRAM_BOOT = 48, /* RLX6 DRAM BOOT NV */
+ GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV */
+ GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV */
+ GFX_FW_TYPE_MAX
};
/* Command to load HW IP FW. */
@@ -217,6 +258,20 @@ struct psp_gfx_cmd_save_restore_ip_fw
enum psp_gfx_fw_type fw_type; /* FW type */
};
+/* Command to setup register program */
+struct psp_gfx_cmd_reg_prog {
+ uint32_t reg_value;
+ uint32_t reg_id;
+};
+
+/* Command to load TOC */
+struct psp_gfx_cmd_load_toc
+{
+ uint32_t toc_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */
+ uint32_t toc_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */
+ uint32_t toc_size; /* FW buffer size in bytes */
+};
+
/* All GFX ring buffer commands. */
union psp_gfx_commands
{
@@ -226,21 +281,24 @@ union psp_gfx_commands
struct psp_gfx_cmd_setup_tmr cmd_setup_tmr;
struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw;
struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
+ struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog;
+ struct psp_gfx_cmd_setup_tmr cmd_setup_vmr;
+ struct psp_gfx_cmd_load_toc cmd_load_toc;
};
-
/* Structure of GFX Response buffer.
* For GPCOM I/F it is part of GFX_CMD_RESP buffer, for RBI
* it is separate buffer.
*/
struct psp_gfx_resp
{
- uint32_t status; /* +0 status of command execution */
- uint32_t session_id; /* +4 session ID in response to LoadTa command */
- uint32_t fw_addr_lo; /* +8 bits [31:0] of FW address within TMR (in response to cmd_load_ip_fw command) */
- uint32_t fw_addr_hi; /* +12 bits [63:32] of FW address within TMR (in response to cmd_load_ip_fw command) */
+ uint32_t status; /* +0 status of command execution */
+ uint32_t session_id; /* +4 session ID in response to LoadTa command */
+ uint32_t fw_addr_lo; /* +8 bits [31:0] of FW address within TMR (in response to cmd_load_ip_fw command) */
+ uint32_t fw_addr_hi; /* +12 bits [63:32] of FW address within TMR (in response to cmd_load_ip_fw command) */
+ uint32_t tmr_size; /* +16 size of the TMR to be reserved including MM fw and Gfx fw in response to cmd_load_toc command */
- uint32_t reserved[4];
+ uint32_t reserved[3];
/* total 32 bytes */
};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 77c2bc344dfc..ce1ea31feee0 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -24,6 +24,9 @@
*/
#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "amdgpu_ucode.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index b91df7bd1d98..61744e2d16fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -21,6 +21,8 @@
*/
#include <linux/firmware.h>
+#include <linux/module.h>
+
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "amdgpu_ucode.h"
@@ -39,9 +41,16 @@
MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
+MODULE_FIRMWARE("amdgpu/navi10_sos.bin");
+MODULE_FIRMWARE("amdgpu/navi10_asd.bin");
/* address block */
#define smnMP1_FIRMWARE_FLAGS 0x3010024
+/* navi10 reg offset define */
+#define mmRLC_GPM_UCODE_ADDR_NV10 0x5b61
+#define mmRLC_GPM_UCODE_DATA_NV10 0x5b62
+#define mmSDMA0_UCODE_ADDR_NV10 0x5880
+#define mmSDMA0_UCODE_DATA_NV10 0x5881
static int psp_v11_0_init_microcode(struct psp_context *psp)
{
@@ -50,6 +59,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
char fw_name[30];
int err = 0;
const struct psp_firmware_header_v1_0 *sos_hdr;
+ const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;
const struct psp_firmware_header_v1_0 *asd_hdr;
const struct ta_firmware_header_v1_0 *ta_hdr;
@@ -59,6 +69,9 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
case CHIP_VEGA20:
chip_name = "vega20";
break;
+ case CHIP_NAVI10:
+ chip_name = "navi10";
+ break;
default:
BUG();
}
@@ -73,15 +86,31 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
goto out;
sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
- adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
- adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version);
- adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes);
- adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->header.ucode_size_bytes) -
- le32_to_cpu(sos_hdr->sos_size_bytes);
- adev->psp.sys_start_addr = (uint8_t *)sos_hdr +
+ amdgpu_ucode_print_psp_hdr(&sos_hdr->header);
+
+ switch (sos_hdr->header.header_version_major) {
+ case 1:
+ adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version);
+ adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version);
+ adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes);
+ adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->sos_offset_bytes);
+ adev->psp.sys_start_addr = (uint8_t *)sos_hdr +
le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes);
- adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
+ adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
le32_to_cpu(sos_hdr->sos_offset_bytes);
+ if (sos_hdr->header.header_version_minor == 1) {
+ sos_hdr_v1_1 = (const struct psp_firmware_header_v1_1 *)adev->psp.sos_fw->data;
+ adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_1->toc_size_bytes);
+ adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr +
+ le32_to_cpu(sos_hdr_v1_1->toc_offset_bytes);
+ }
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unsupported psp sos firmware\n");
+ err = -EINVAL;
+ goto out;
+ }
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
@@ -99,30 +128,36 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
adev->psp.asd_start_addr = (uint8_t *)asd_hdr +
le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
- err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
- if (err) {
- release_firmware(adev->psp.ta_fw);
- adev->psp.ta_fw = NULL;
- dev_info(adev->dev,
- "psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
- } else {
- err = amdgpu_ucode_validate(adev->psp.ta_fw);
- if (err)
- goto out2;
-
- ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
- adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
- adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes);
- adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr +
- le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
-
- adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
-
- adev->psp.ta_ras_ucode_version = le32_to_cpu(ta_hdr->ta_ras_ucode_version);
- adev->psp.ta_ras_ucode_size = le32_to_cpu(ta_hdr->ta_ras_size_bytes);
- adev->psp.ta_ras_start_addr = (uint8_t *)adev->psp.ta_xgmi_start_addr +
- le32_to_cpu(ta_hdr->ta_ras_offset_bytes);
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
+ err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
+ if (err) {
+ release_firmware(adev->psp.ta_fw);
+ adev->psp.ta_fw = NULL;
+ dev_info(adev->dev,
+ "psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
+ } else {
+ err = amdgpu_ucode_validate(adev->psp.ta_fw);
+ if (err)
+ goto out2;
+
+ ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
+ adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
+ adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes);
+ adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr +
+ le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+ adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version);
+ adev->psp.ta_ras_ucode_version = le32_to_cpu(ta_hdr->ta_ras_ucode_version);
+ adev->psp.ta_ras_ucode_size = le32_to_cpu(ta_hdr->ta_ras_size_bytes);
+ adev->psp.ta_ras_start_addr = (uint8_t *)adev->psp.ta_xgmi_start_addr +
+ le32_to_cpu(ta_hdr->ta_ras_offset_bytes);
+ }
+ break;
+ case CHIP_NAVI10:
+ break;
+ default:
+ BUG();
}
return 0;
@@ -499,14 +534,24 @@ psp_v11_0_sram_map(struct amdgpu_device *adev,
case AMDGPU_UCODE_ID_RLC_G:
*sram_offset = 0x2000;
- *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR);
- *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA);
+ if (adev->asic_type < CHIP_NAVI10) {
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA);
+ } else {
+ *sram_addr_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmRLC_GPM_UCODE_ADDR_NV10;
+ *sram_data_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmRLC_GPM_UCODE_DATA_NV10;
+ }
break;
case AMDGPU_UCODE_ID_SDMA0:
*sram_offset = 0x0;
- *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR);
- *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA);
+ if (adev->asic_type < CHIP_NAVI10) {
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA);
+ } else {
+ *sram_addr_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmSDMA0_UCODE_ADDR_NV10;
+ *sram_data_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmSDMA0_UCODE_DATA_NV10;
+ }
break;
/* TODO: needs to confirm */
@@ -770,6 +815,11 @@ static int psp_v11_0_ras_cure_posion(struct psp_context *psp, uint64_t *mode_ptr
#endif
}
+static int psp_v11_0_rlc_autoload_start(struct psp_context *psp)
+{
+ return psp_rlc_autoload_start(psp);
+}
+
static const struct psp_funcs psp_v11_0_funcs = {
.init_microcode = psp_v11_0_init_microcode,
.bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv,
@@ -788,6 +838,7 @@ static const struct psp_funcs psp_v11_0_funcs = {
.support_vmr_ring = psp_v11_0_support_vmr_ring,
.ras_trigger_error = psp_v11_0_ras_trigger_error,
.ras_cure_posion = psp_v11_0_ras_cure_posion,
+ .rlc_autoload_start = psp_v11_0_rlc_autoload_start,
};
void psp_v11_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 143f0fae69d5..2ea772692037 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -24,7 +24,9 @@
*/
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "amdgpu_ucode.h"
@@ -50,6 +52,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
+static bool psp_v3_1_support_vmr_ring(struct psp_context *psp);
+static int psp_v3_1_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type);
+
static int psp_v3_1_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -296,27 +302,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
psp_v3_1_reroute_ih(psp);
- /* Write low address of the ring to C2PMSG_69 */
- psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
- /* Write high address of the ring to C2PMSG_70 */
- psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
- /* Write size of ring to C2PMSG_71 */
- psp_ring_reg = ring->ring_size;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
- /* Write the ring initialization command to C2PMSG_64 */
- psp_ring_reg = ring_type;
- psp_ring_reg = psp_ring_reg << 16;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x8000FFFF, false);
+ if (psp_v3_1_support_vmr_ring(psp)) {
+ ret = psp_v3_1_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
+ /* No size initialization for sriov */
+ /* Write the ring initialization command to C2PMSG_101 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
+
+ /* there might be hardware handshake issue which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
+ mmMP0_SMN_C2PMSG_101), 0x80000000,
+ 0x8000FFFF, false);
+ } else {
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be hardware handshake issue which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
+ mmMP0_SMN_C2PMSG_64), 0x80000000,
+ 0x8000FFFF, false);
+ }
return ret;
}
@@ -327,16 +363,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
unsigned int psp_ring_reg = 0;
struct amdgpu_device *adev = psp->adev;
- /* Write the ring destroy command to C2PMSG_64 */
- psp_ring_reg = 3 << 16;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
- /* there might be handshake issue with hardware which needs delay */
- mdelay(20);
-
- /* Wait for response flag (bit 31) in C2PMSG_64 */
- ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
- 0x80000000, 0x80000000, false);
+ if (psp_v3_1_support_vmr_ring(psp)) {
+ /* Write the Destroy GPCOM ring command to C2PMSG_101 */
+ psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
+
+ /* there might be handshake issue which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ } else {
+ /* Write the ring destroy command to C2PMSG_64 */
+ psp_ring_reg = 3 << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ }
return ret;
}
@@ -375,7 +426,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
/* KM (GPCOM) prepare write pointer */
- psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+ if (psp_v3_1_support_vmr_ring(psp))
+ psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+ else
+ psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
/* Update KM RB frame pointer to new frame */
/* write_frame ptr increments by size of rb_frame in bytes */
@@ -404,7 +458,13 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
/* Update the write Pointer in DWORDs */
psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
- WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
+ if (psp_v3_1_support_vmr_ring(psp)) {
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
+ /* send interrupt to PSP for SRIOV ring write pointer update */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
return 0;
}
@@ -574,6 +634,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
return 0;
}
+static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
+{
+ if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455)
+ return true;
+
+ return false;
+}
+
static const struct psp_funcs psp_v3_1_funcs = {
.init_microcode = psp_v3_1_init_microcode,
.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
@@ -586,6 +654,7 @@ static const struct psp_funcs psp_v3_1_funcs = {
.compare_sram_data = psp_v3_1_compare_sram_data,
.smu_reload_quirk = psp_v3_1_smu_reload_quirk,
.mode1_reset = psp_v3_1_mode1_reset,
+ .support_vmr_ring = psp_v3_1_support_vmr_ring,
};
void psp_v3_1_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 36196372e8db..a10175838013 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -21,8 +21,11 @@
*
* Authors: Alex Deucher
*/
+
+#include <linux/delay.h>
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+
#include "amdgpu.h"
#include "amdgpu_ucode.h"
#include "amdgpu_trace.h"
@@ -574,7 +577,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 6d39544e7829..5f4e2c616241 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -21,8 +21,11 @@
*
* Authors: Alex Deucher
*/
+
+#include <linux/delay.h>
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+
#include "amdgpu.h"
#include "amdgpu_ucode.h"
#include "amdgpu_trace.h"
@@ -846,7 +849,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 9c88ce513d78..4428018672d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -21,8 +21,11 @@
*
*/
+#include <linux/delay.h>
#include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_ucode.h"
#include "amdgpu_trace.h"
@@ -210,12 +213,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_VEGA10:
- soc15_program_register_sequence(adev,
- golden_settings_sdma_4,
- ARRAY_SIZE(golden_settings_sdma_4));
- soc15_program_register_sequence(adev,
- golden_settings_sdma_vg10,
- ARRAY_SIZE(golden_settings_sdma_vg10));
+ if (!amdgpu_virt_support_skip_setting(adev)) {
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_4,
+ ARRAY_SIZE(golden_settings_sdma_4));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_vg10,
+ ARRAY_SIZE(golden_settings_sdma_vg10));
+ }
break;
case CHIP_VEGA12:
soc15_program_register_sequence(adev,
@@ -1090,7 +1095,7 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
static int sdma_v4_0_start(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring;
- int i, r;
+ int i, r = 0;
if (amdgpu_sriov_vf(adev)) {
sdma_v4_0_ctx_switch_enable(adev, false);
@@ -1207,7 +1212,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -1521,8 +1526,25 @@ static int sdma_v4_0_late_init(void *handle)
}
/* handle resume path. */
- if (*ras_if)
+ if (*ras_if) {
+ /* resend ras TA enable cmd during resume.
+ * prepare to handle failure.
+ */
+ ih_info.head = **ras_if;
+ r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
+ if (r) {
+ if (r == -EAGAIN) {
+ /* request a gpu reset. will run again. */
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__SDMA);
+ return 0;
+ }
+ /* fail to enable ras, cleanup all. */
+ goto irq;
+ }
+ /* enable successfully. continue. */
goto resume;
+ }
*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
if (!*ras_if)
@@ -1531,8 +1553,14 @@ static int sdma_v4_0_late_init(void *handle)
**ras_if = ras_block;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
- if (r)
+ if (r) {
+ if (r == -EAGAIN) {
+ amdgpu_ras_request_reset_on_boot(adev,
+ AMDGPU_RAS_BLOCK__SDMA);
+ r = 0;
+ }
goto feature;
+ }
ih_info.head = **ras_if;
fs_info.head = **ras_if;
@@ -1541,9 +1569,7 @@ static int sdma_v4_0_late_init(void *handle)
if (r)
goto interrupt;
- r = amdgpu_ras_debugfs_create(adev, &fs_info);
- if (r)
- goto debugfs;
+ amdgpu_ras_debugfs_create(adev, &fs_info);
r = amdgpu_ras_sysfs_create(adev, &fs_info);
if (r)
@@ -1564,14 +1590,13 @@ irq:
amdgpu_ras_sysfs_remove(adev, *ras_if);
sysfs:
amdgpu_ras_debugfs_remove(adev, *ras_if);
-debugfs:
amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
interrupt:
amdgpu_ras_feature_enable(adev, *ras_if, 0);
feature:
kfree(*ras_if);
*ras_if = NULL;
- return -EINVAL;
+ return r;
}
static int sdma_v4_0_sw_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
new file mode 100644
index 000000000000..3747c3f1f0cc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -0,0 +1,1687 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_trace.h"
+
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "hdp/hdp_5_0_0_offset.h"
+#include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h"
+#include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h"
+
+#include "soc15_common.h"
+#include "soc15.h"
+#include "navi10_sdma_pkt_open.h"
+#include "nbio_v2_3.h"
+#include "sdma_v5_0.h"
+
+MODULE_FIRMWARE("amdgpu/navi10_sdma.bin");
+MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin");
+
+#define SDMA1_REG_OFFSET 0x600
+#define SDMA0_HYP_DEC_REG_START 0x5880
+#define SDMA0_HYP_DEC_REG_END 0x5893
+#define SDMA1_HYP_DEC_REG_OFFSET 0x20
+
+static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev);
+static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev);
+static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev);
+static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev);
+
+static const struct soc15_reg_golden golden_settings_sdma_5[] = {
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x00ffffff, 0x000c5c00),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x00ffffff, 0x000c5c00)
+};
+
+static const struct soc15_reg_golden golden_settings_sdma_nv10[] = {
+};
+
+static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
+{
+ u32 base;
+
+ if (internal_offset >= SDMA0_HYP_DEC_REG_START &&
+ internal_offset <= SDMA0_HYP_DEC_REG_END) {
+ base = adev->reg_offset[GC_HWIP][0][1];
+ if (instance == 1)
+ internal_offset += SDMA1_HYP_DEC_REG_OFFSET;
+ } else {
+ base = adev->reg_offset[GC_HWIP][0][0];
+ if (instance == 1)
+ internal_offset += SDMA1_REG_OFFSET;
+ }
+
+ return base + internal_offset;
+}
+
+static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)
+{
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_5,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_5));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_nv10,
+ (const u32)ARRAY_SIZE(golden_settings_sdma_nv10));
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * sdma_v5_0_init_microcode - load ucode images from disk
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Use the firmware interface to load the ucode images into
+ * the driver (not loaded into hw).
+ * Returns 0 on success, error on failure.
+ */
+
+// emulation only, won't work on real chip
+// navi10 real chip need to use PSP to load firmware
+static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
+{
+ const char *chip_name;
+ char fw_name[30];
+ int err = 0, i;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct common_firmware_header *header = NULL;
+ const struct sdma_firmware_header_v1_0 *hdr;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ chip_name = "navi10";
+ break;
+ default:
+ BUG();
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (i == 0)
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
+ else
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
+ err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+ err = amdgpu_ucode_validate(adev->sdma.instance[i].fw);
+ if (err)
+ goto out;
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
+ adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ if (adev->sdma.instance[i].feature_version >= 20)
+ adev->sdma.instance[i].burst_nop = true;
+ DRM_DEBUG("psp_load == '%s'\n",
+ adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
+ info->fw = adev->sdma.instance[i].fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+out:
+ if (err) {
+ DRM_ERROR("sdma_v5_0: Failed to load firmware \"%s\"\n", fw_name);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ release_firmware(adev->sdma.instance[i].fw);
+ adev->sdma.instance[i].fw = NULL;
+ }
+ }
+ return err;
+}
+
+static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring)
+{
+ unsigned ret;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
+ amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+ amdgpu_ring_write(ring, 1);
+ ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
+ amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
+
+ return ret;
+}
+
+static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
+ unsigned offset)
+{
+ unsigned cur;
+
+ BUG_ON(offset > ring->buf_mask);
+ BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+ cur = (ring->wptr - 1) & ring->buf_mask;
+ if (cur > offset)
+ ring->ring[offset] = cur - offset;
+ else
+ ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
+}
+
+/**
+ * sdma_v5_0_ring_get_rptr - get the current read pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current rptr from the hardware (NAVI10+).
+ */
+static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ u64 *rptr;
+
+ /* XXX check if swapping is necessary on BE */
+ rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
+
+ DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
+ return ((*rptr) >> 2);
+}
+
+/**
+ * sdma_v5_0_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (NAVI10+).
+ */
+static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u64 *wptr = NULL;
+ uint64_t local_wptr = 0;
+
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
+ *wptr = (*wptr) >> 2;
+ DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
+ } else {
+ u32 lowbit, highbit;
+
+ wptr = &local_wptr;
+ lowbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
+ highbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
+
+ DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
+ ring->me, highbit, lowbit);
+ *wptr = highbit;
+ *wptr = (*wptr) << 32;
+ *wptr |= lowbit;
+ }
+
+ return *wptr;
+}
+
+/**
+ * sdma_v5_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (NAVI10+).
+ */
+static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ DRM_DEBUG("Setting write pointer\n");
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2);
+ adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
+}
+
+static void sdma_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->funcs->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
+/**
+ * sdma_v5_0_ring_emit_ib - Schedule an IB on the DMA engine
+ *
+ * @ring: amdgpu ring pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (NAVI10).
+ */
+static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
+
+ /* IB packet must end on a 8 DW boundary */
+ sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
+ SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
+ /* base must be 32 byte aligned */
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+ amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
+ amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
+}
+
+/**
+ * sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Emit an hdp flush packet on the requested DMA ring.
+ */
+static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ u32 ref_and_mask = 0;
+ const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
+
+ if (ring->me == 0)
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
+ else
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+}
+
+/**
+ * sdma_v5_0_ring_emit_fence - emit a fence on the DMA ring
+ *
+ * @ring: amdgpu ring pointer
+ * @fence: amdgpu fence object
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (NAVI10).
+ */
+static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
+ /* write the fence */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, lower_32_bits(seq));
+
+ /* optionally write high bits as well */
+ if (write64bit) {
+ addr += 4;
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) |
+ SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
+ /* zero in first two bits */
+ BUG_ON(addr & 0x3);
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, upper_32_bits(seq));
+ }
+
+ /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */
+ if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) {
+ /* generate an interrupt */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
+ amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
+ }
+}
+
+
+/**
+ * sdma_v5_0_gfx_stop - stop the gfx async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the gfx async dma ring buffers (NAVI10).
+ */
+static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring;
+ struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring;
+ u32 rb_cntl, ib_cntl;
+ int i;
+
+ if ((adev->mman.buffer_funcs_ring == sdma0) ||
+ (adev->mman.buffer_funcs_ring == sdma1))
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ }
+
+ sdma0->sched.ready = false;
+ sdma1->sched.ready = false;
+}
+
+/**
+ * sdma_v5_0_rlc_stop - stop the compute async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the compute async dma queues (NAVI10).
+ */
+static void sdma_v5_0_rlc_stop(struct amdgpu_device *adev)
+{
+ /* XXX todo */
+}
+
+/**
+ * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs context switch.
+ *
+ * Halt or unhalt the async dma engines context switch (NAVI10).
+ */
+static void sdma_v5_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl, phase_quantum = 0;
+ int i;
+
+ if (amdgpu_sdma_phase_quantum) {
+ unsigned value = amdgpu_sdma_phase_quantum;
+ unsigned unit = 0;
+
+ while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
+ value = (value + 1) >> 1;
+ unit++;
+ }
+ if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
+ value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
+ SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
+ unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
+ SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
+ WARN_ONCE(1,
+ "clamping sdma_phase_quantum to %uK clock cycles\n",
+ value << unit);
+ }
+ phase_quantum =
+ value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
+ unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+ if (enable && amdgpu_sdma_phase_quantum) {
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
+ phase_quantum);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
+ phase_quantum);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
+ phase_quantum);
+ }
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+ }
+
+}
+
+/**
+ * sdma_v5_0_enable - stop the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines (NAVI10).
+ */
+static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable)
+{
+ u32 f32_cntl;
+ int i;
+
+ if (enable == false) {
+ sdma_v5_0_gfx_stop(adev);
+ sdma_v5_0_rlc_stop(adev);
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+ }
+}
+
+/**
+ * sdma_v5_0_gfx_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ u32 rb_cntl, ib_cntl;
+ u32 rb_bufsz;
+ u32 wb_offset;
+ u32 doorbell;
+ u32 doorbell_offset;
+ u32 temp;
+ u32 wptr_poll_cntl;
+ u64 wptr_gpu_addr;
+ int i, r;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ wb_offset = (ring->rptr_offs * 4);
+
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i,
+ mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+ SDMA0_GFX_RB_WPTR_POLL_CNTL,
+ F32_POLL_ENABLE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL),
+ wptr_poll_cntl);
+
+ /* set the wb address whether it's enabled or not */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+
+ ring->wptr = 0;
+
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
+
+ doorbell = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+
+ adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index, 20);
+
+ if (amdgpu_sriov_vf(adev))
+ sdma_v5_0_ring_set_wptr(ring);
+
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+
+ /* enable MCBP */
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+ /* Set up RESP_MODE to non-copy addresses */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
+ temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp);
+
+ /* program default cache read and write policy */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE));
+ /* clean read policy and write policy bits */
+ temp &= 0xFF0FFF;
+ temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
+
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
+
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+
+ ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+
+ ring->sched.ready = true;
+
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_0_ctx_switch_enable(adev, true);
+ sdma_v5_0_enable(adev, true);
+ }
+
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->sched.ready = false;
+ return r;
+ }
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v5_0_rlc_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the compute DMA queues and enable them (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_rlc_resume(struct amdgpu_device *adev)
+{
+ return 0;
+}
+
+/**
+ * sdma_v5_0_load_microcode - load the sDMA ME ucode
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int sdma_v5_0_load_microcode(struct amdgpu_device *adev)
+{
+ const struct sdma_firmware_header_v1_0 *hdr;
+ const __le32 *fw_data;
+ u32 fw_size;
+ int i, j;
+
+ /* halt the MEs */
+ sdma_v5_0_enable(adev, false);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (!adev->sdma.instance[i].fw)
+ return -EINVAL;
+
+ hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
+ amdgpu_ucode_print_sdma_hdr(&hdr->header);
+ fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
+
+ fw_data = (const __le32 *)
+ (adev->sdma.instance[i].fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
+
+ for (j = 0; j < fw_size; j++) {
+ if (amdgpu_emu_mode == 1 && j % 500 == 0)
+ msleep(1);
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+ }
+
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
+ }
+
+ return 0;
+}
+
+/**
+ * sdma_v5_0_start - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Set up the DMA engines and enable them (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_start(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v5_0_ctx_switch_enable(adev, false);
+ sdma_v5_0_enable(adev, false);
+
+ /* set RB registers */
+ r = sdma_v5_0_gfx_resume(adev);
+ return r;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
+ r = sdma_v5_0_load_microcode(adev);
+ if (r)
+ return r;
+
+ /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */
+ if (amdgpu_emu_mode == 1 && adev->pdev->device == 0x4d)
+ msleep(1000);
+ }
+
+ /* unhalt the MEs */
+ sdma_v5_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v5_0_ctx_switch_enable(adev, true);
+
+ /* start the gfx rings and rlc compute queues */
+ r = sdma_v5_0_gfx_resume(adev);
+ if (r)
+ return r;
+ r = sdma_v5_0_rlc_resume(adev);
+
+ return r;
+}
+
+/**
+ * sdma_v5_0_ring_test_ring - simple async dma engine test
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (NAVI10).
+ * Returns 0 for success, error for failure.
+ */
+static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned i;
+ unsigned index;
+ int r;
+ u32 tmp;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+
+ r = amdgpu_ring_alloc(ring, 5);
+ if (r) {
+ DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
+ amdgpu_device_wb_free(adev, index);
+ return r;
+ }
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
+ amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
+ amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF)
+ break;
+ if (amdgpu_emu_mode == 1)
+ msleep(1);
+ else
+ DRM_UDELAY(1);
+ }
+
+ if (i < adev->usec_timeout) {
+ if (amdgpu_emu_mode == 1)
+ DRM_INFO("ring test on %d succeeded in %d msecs\n", ring->idx, i);
+ else
+ DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
+ } else {
+ DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
+ ring->idx, tmp);
+ r = -EINVAL;
+ }
+ amdgpu_device_wb_free(adev, index);
+
+ return r;
+}
+
+/**
+ * sdma_v5_0_ring_test_ib - test an IB on the DMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ *
+ * Test a simple IB in the DMA ring (NAVI10).
+ * Returns 0 on success, error on failure.
+ */
+static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+ unsigned index;
+ long r;
+ u32 tmp = 0;
+ u64 gpu_addr;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ tmp = 0xCAFEDEAD;
+ adev->wb.wb[index] = cpu_to_le32(tmp);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err0;
+ }
+
+ ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib.ptr[1] = lower_32_bits(gpu_addr);
+ ib.ptr[2] = upper_32_bits(gpu_addr);
+ ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
+ ib.length_dw = 8;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err1;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
+ goto err1;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF) {
+ DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ r = 0;
+ } else {
+ DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
+ r = -EINVAL;
+ }
+
+err1:
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
+err0:
+ amdgpu_device_wb_free(adev, index);
+ return r;
+}
+
+
+/**
+ * sdma_v5_0_vm_copy_pte - update PTEs by copying them from the GART
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @src: src addr to copy from
+ * @count: number of page entries to update
+ *
+ * Update PTEs by copying them from the GART using sDMA (NAVI10).
+ */
+static void sdma_v5_0_vm_copy_pte(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count)
+{
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+
+}
+
+/**
+ * sdma_v5_0_vm_write_pte - update PTEs by writing them manually
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update PTEs by writing them manually using sDMA (NAVI10).
+ */
+static void sdma_v5_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
+{
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw - 1;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
+ }
+}
+
+/**
+ * sdma_v5_0_vm_set_pte_pde - update the page tables using sDMA
+ *
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA (NAVI10).
+ */
+static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags)
+{
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
+ ib->ptr[ib->length_dw++] = upper_32_bits(flags);
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
+}
+
+/**
+ * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
+ *
+ * @ib: indirect buffer to fill with padding
+ *
+ */
+static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+}
+
+
+/**
+ * sdma_v5_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xfffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+
+/**
+ * sdma_v5_0_ring_emit_vm_flush - vm flush using sDMA
+ *
+ * @ring: amdgpu_ring pointer
+ * @vm: amdgpu_vm pointer
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (NAVI10).
+ */
+static void sdma_v5_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
+
+static void sdma_v5_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
+static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+}
+
+static int sdma_v5_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->sdma.num_instances = 2;
+
+ sdma_v5_0_set_ring_funcs(adev);
+ sdma_v5_0_set_buffer_funcs(adev);
+ sdma_v5_0_set_vm_pte_funcs(adev);
+ sdma_v5_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+
+static int sdma_v5_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int r, i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0,
+ SDMA0_5_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ /* SDMA trap event */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1,
+ SDMA1_5_0__SRCID__SDMA_TRAP,
+ &adev->sdma.trap_irq);
+ if (r)
+ return r;
+
+ r = sdma_v5_0_init_microcode(adev);
+ if (r) {
+ DRM_ERROR("Failed to load sdma firmware!\n");
+ return r;
+ }
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+ ring->use_doorbell = true;
+
+ DRM_INFO("use_doorbell being set to: [%s]\n",
+ ring->use_doorbell?"true":"false");
+
+ ring->doorbell_index = (i == 0) ?
+ (adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset
+ : (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset
+
+ sprintf(ring->name, "sdma%d", i);
+ r = amdgpu_ring_init(adev, ring, 1024,
+ &adev->sdma.trap_irq,
+ (i == 0) ?
+ AMDGPU_SDMA_IRQ_INSTANCE0 :
+ AMDGPU_SDMA_IRQ_INSTANCE1);
+ if (r)
+ return r;
+ }
+
+ return r;
+}
+
+static int sdma_v5_0_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+
+ return 0;
+}
+
+static int sdma_v5_0_hw_init(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ sdma_v5_0_init_golden_registers(adev);
+
+ r = sdma_v5_0_start(adev);
+
+ return r;
+}
+
+static int sdma_v5_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ sdma_v5_0_ctx_switch_enable(adev, false);
+ sdma_v5_0_enable(adev, false);
+
+ return 0;
+}
+
+static int sdma_v5_0_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v5_0_hw_fini(adev);
+}
+
+static int sdma_v5_0_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return sdma_v5_0_hw_init(adev);
+}
+
+static bool sdma_v5_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ u32 i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ u32 tmp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
+
+ if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
+ return false;
+ }
+
+ return true;
+}
+
+static int sdma_v5_0_wait_for_idle(void *handle)
+{
+ unsigned i;
+ u32 sdma0, sdma1;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ sdma0 = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
+ sdma1 = RREG32(sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
+
+ if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
+ return 0;
+ udelay(1);
+ }
+ return -ETIMEDOUT;
+}
+
+static int sdma_v5_0_soft_reset(void *handle)
+{
+ /* todo */
+
+ return 0;
+}
+
+static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+ int i, r = 0;
+ struct amdgpu_device *adev = ring->adev;
+ u32 index = 0;
+ u64 sdma_gfx_preempt;
+
+ amdgpu_sdma_get_index_from_ring(ring, &index);
+ if (index == 0)
+ sdma_gfx_preempt = mmSDMA0_GFX_PREEMPT;
+ else
+ sdma_gfx_preempt = mmSDMA1_GFX_PREEMPT;
+
+ /* assert preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+ /* emit the trailing fence */
+ ring->trail_seq += 1;
+ amdgpu_ring_alloc(ring, 10);
+ sdma_v5_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+ ring->trail_seq, 0);
+ amdgpu_ring_commit(ring);
+
+ /* assert IB preemption */
+ WREG32(sdma_gfx_preempt, 1);
+
+ /* poll the trailing fence */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (ring->trail_seq ==
+ le32_to_cpu(*(ring->trail_fence_cpu_addr)))
+ break;
+ DRM_UDELAY(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ r = -EINVAL;
+ DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
+ }
+
+ /* deassert IB preemption */
+ WREG32(sdma_gfx_preempt, 0);
+
+ /* deassert the preemption condition */
+ amdgpu_ring_set_preempt_cond_exec(ring, true);
+ return r;
+}
+
+static int sdma_v5_0_set_trap_irq_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ u32 sdma_cntl;
+
+ u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ?
+ sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
+ sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
+
+ sdma_cntl = RREG32(reg_offset);
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32(reg_offset, sdma_cntl);
+
+ return 0;
+}
+
+static int sdma_v5_0_process_trap_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: SDMA trap\n");
+ switch (entry->client_id) {
+ case SOC15_IH_CLIENTID_SDMA0:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[0].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ case SOC15_IH_CLIENTID_SDMA1:
+ switch (entry->ring_id) {
+ case 0:
+ amdgpu_fence_process(&adev->sdma.instance[1].ring);
+ break;
+ case 1:
+ /* XXX compute */
+ break;
+ case 2:
+ /* XXX compute */
+ break;
+ case 3:
+ /* XXX page queue*/
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static int sdma_v5_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ return 0;
+}
+
+static void sdma_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
+ /* Enable sdma clock gating */
+ def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
+ data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
+ } else {
+ /* Disable sdma clock gating */
+ def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
+ data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK |
+ SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);
+ if (def != data)
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data);
+ }
+ }
+}
+
+static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t data, def;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
+ /* Enable sdma mem light sleep */
+ def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
+ data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
+
+ } else {
+ /* Disable sdma mem light sleep */
+ def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
+ data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
+ if (def != data)
+ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data);
+
+ }
+ }
+}
+
+static int sdma_v5_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ sdma_v5_0_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ sdma_v5_0_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE ? true : false);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int sdma_v5_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ /* AMD_CG_SUPPORT_SDMA_MGCG */
+ data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL));
+ if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK))
+ *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
+
+ /* AMD_CG_SUPPORT_SDMA_LS */
+ data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL));
+ if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
+ *flags |= AMD_CG_SUPPORT_SDMA_LS;
+}
+
+const struct amd_ip_funcs sdma_v5_0_ip_funcs = {
+ .name = "sdma_v5_0",
+ .early_init = sdma_v5_0_early_init,
+ .late_init = NULL,
+ .sw_init = sdma_v5_0_sw_init,
+ .sw_fini = sdma_v5_0_sw_fini,
+ .hw_init = sdma_v5_0_hw_init,
+ .hw_fini = sdma_v5_0_hw_fini,
+ .suspend = sdma_v5_0_suspend,
+ .resume = sdma_v5_0_resume,
+ .is_idle = sdma_v5_0_is_idle,
+ .wait_for_idle = sdma_v5_0_wait_for_idle,
+ .soft_reset = sdma_v5_0_soft_reset,
+ .set_clockgating_state = sdma_v5_0_set_clockgating_state,
+ .set_powergating_state = sdma_v5_0_set_powergating_state,
+ .get_clockgating_state = sdma_v5_0_get_clockgating_state,
+};
+
+static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
+ .type = AMDGPU_RING_TYPE_SDMA,
+ .align_mask = 0xf,
+ .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+ .support_64bit_ptrs = true,
+ .vmhub = AMDGPU_GFXHUB,
+ .get_rptr = sdma_v5_0_ring_get_rptr,
+ .get_wptr = sdma_v5_0_ring_get_wptr,
+ .set_wptr = sdma_v5_0_ring_set_wptr,
+ .emit_frame_size =
+ 5 + /* sdma_v5_0_ring_init_cond_exec */
+ 6 + /* sdma_v5_0_ring_emit_hdp_flush */
+ 3 + /* hdp_invalidate */
+ 6 + /* sdma_v5_0_ring_emit_pipeline_sync */
+ /* sdma_v5_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
+ .emit_ib = sdma_v5_0_ring_emit_ib,
+ .emit_fence = sdma_v5_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,
+ .emit_vm_flush = sdma_v5_0_ring_emit_vm_flush,
+ .emit_hdp_flush = sdma_v5_0_ring_emit_hdp_flush,
+ .test_ring = sdma_v5_0_ring_test_ring,
+ .test_ib = sdma_v5_0_ring_test_ib,
+ .insert_nop = sdma_v5_0_ring_insert_nop,
+ .pad_ib = sdma_v5_0_ring_pad_ib,
+ .emit_wreg = sdma_v5_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+ .init_cond_exec = sdma_v5_0_ring_init_cond_exec,
+ .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
+ .preempt_ib = sdma_v5_0_ring_preempt_ib,
+};
+
+static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ adev->sdma.instance[i].ring.funcs = &sdma_v5_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
+}
+
+static const struct amdgpu_irq_src_funcs sdma_v5_0_trap_irq_funcs = {
+ .set = sdma_v5_0_set_trap_irq_state,
+ .process = sdma_v5_0_process_trap_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v5_0_illegal_inst_irq_funcs = {
+ .process = sdma_v5_0_process_illegal_inst_irq,
+};
+
+static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.trap_irq.funcs = &sdma_v5_0_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v5_0_illegal_inst_irq_funcs;
+}
+
+/**
+ * sdma_v5_0_emit_copy_buffer - copy buffer using the sDMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Copy GPU buffers using the DMA engine (NAVI10).
+ * Used by the amdgpu ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib,
+ uint64_t src_offset,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+}
+
+/**
+ * sdma_v5_0_emit_fill_buffer - fill buffer using the sDMA engine
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @src_data: value to write to buffer
+ * @dst_offset: dst GPU address
+ * @byte_count: number of bytes to xfer
+ *
+ * Fill GPU buffers using the DMA engine (NAVI10).
+ */
+static void sdma_v5_0_emit_fill_buffer(struct amdgpu_ib *ib,
+ uint32_t src_data,
+ uint64_t dst_offset,
+ uint32_t byte_count)
+{
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count - 1;
+}
+
+static const struct amdgpu_buffer_funcs sdma_v5_0_buffer_funcs = {
+ .copy_max_bytes = 0x400000,
+ .copy_num_dw = 7,
+ .emit_copy_buffer = sdma_v5_0_emit_copy_buffer,
+
+ .fill_max_bytes = 0x400000,
+ .fill_num_dw = 5,
+ .emit_fill_buffer = sdma_v5_0_emit_fill_buffer,
+};
+
+static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev)
+{
+ if (adev->mman.buffer_funcs == NULL) {
+ adev->mman.buffer_funcs = &sdma_v5_0_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
+ }
+}
+
+static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = {
+ .copy_pte_num_dw = 7,
+ .copy_pte = sdma_v5_0_vm_copy_pte,
+ .write_pte = sdma_v5_0_vm_write_pte,
+ .set_pte_pde = sdma_v5_0_vm_set_pte_pde,
+};
+
+static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev)
+{
+ struct drm_gpu_scheduler *sched;
+ unsigned i;
+
+ if (adev->vm_manager.vm_pte_funcs == NULL) {
+ adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_rqs[i] =
+ &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ }
+ adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
+ }
+}
+
+const struct amdgpu_ip_block_version sdma_v5_0_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_SDMA,
+ .major = 5,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &sdma_v5_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
new file mode 100644
index 000000000000..d5a94e3d181c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __SDMA_V5_0_H__
+#define __SDMA_V5_0_H__
+
+enum sdma_v5_0_utcl2_cache_read_policy {
+ CACHE_READ_POLICY_L2__LRU = 0x00000000,
+ CACHE_READ_POLICY_L2__STREAM = 0x00000001,
+ CACHE_READ_POLICY_L2__NOA = 0x00000002,
+ CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA,
+};
+
+enum sdma_v5_0_utcl2_cache_write_policy {
+ CACHE_WRITE_POLICY_L2__LRU = 0x00000000,
+ CACHE_WRITE_POLICY_L2__STREAM = 0x00000001,
+ CACHE_WRITE_POLICY_L2__NOA = 0x00000002,
+ CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003,
+ CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS,
+};
+
+extern const struct amd_ip_funcs sdma_v5_0_ip_funcs;
+extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block;
+
+#endif /* __SDMA_V5_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 9d8df68893b9..4d74453f3cfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -24,7 +24,8 @@
#include <linux/firmware.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
@@ -1339,8 +1340,8 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
/* This reports 0 on APUs, so return to avoid writing/reading registers
* that may or may not be different from their GPU counterparts
*/
- if (adev->flags & AMD_IS_APU)
- return;
+ if (adev->flags & AMD_IS_APU)
+ return;
/* Set the 2 events that we wish to watch, defined above */
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
@@ -1375,6 +1376,18 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
}
+static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
static const struct amdgpu_asic_funcs si_asic_funcs =
{
.read_disabled_bios = &si_read_disabled_bios,
@@ -1393,6 +1406,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.need_full_reset = &si_need_full_reset,
.get_pcie_usage = &si_get_pcie_usage,
.need_reset_on_init = &si_need_reset_on_init,
+ .get_pcie_replay_count = &si_get_pcie_replay_count,
};
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 3eeefd40dae0..bdda8b4e03f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -21,7 +21,7 @@
*
* Authors: Alex Deucher
*/
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "si.h"
@@ -230,7 +230,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index d57e75e5c71f..4cb4c891120b 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -21,7 +21,9 @@
*
*/
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_dpm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 8c50c9cab455..57bb5f9e08b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -20,7 +20,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "sid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/si_smc.c b/drivers/gpu/drm/amd/amdgpu/si_smc.c
index 4a2fd8b61940..8f994ffa9cd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_smc.c
@@ -23,7 +23,7 @@
*/
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "sid.h"
#include "ppsmc.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index b7e594c2bfb4..87152d8ef0df 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -23,7 +23,8 @@
#include <linux/firmware.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
@@ -44,6 +45,7 @@
#include "smuio/smuio_9_0_offset.h"
#include "smuio/smuio_9_0_sh_mask.h"
#include "nbio/nbio_7_0_default.h"
+#include "nbio/nbio_7_0_offset.h"
#include "nbio/nbio_7_0_sh_mask.h"
#include "nbio/nbio_7_0_smn.h"
#include "mp/mp_9_0_offset.h"
@@ -64,6 +66,9 @@
#include "dce_virtual.h"
#include "mxgpu_ai.h"
#include "amdgpu_smu.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
+#include <uapi/linux/kfd_ioctl.h>
#define mmMP0_MISC_CGTT_CTRL0 0x01b9
#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
@@ -230,7 +235,7 @@ void soc15_grbm_select(struct amdgpu_device *adev,
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
- WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl);
+ WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
}
static void soc15_vga_set_state(struct amdgpu_device *adev, bool state)
@@ -270,15 +275,6 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev,
return true;
}
-struct soc15_allowed_register_entry {
- uint32_t hwip;
- uint32_t inst;
- uint32_t seg;
- uint32_t reg_offset;
- bool grbm_indexed;
-};
-
-
static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = {
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)},
{ SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)},
@@ -383,9 +379,17 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
} else {
tmp = RREG32(reg);
tmp &= ~(entry->and_mask);
- tmp |= entry->or_mask;
+ tmp |= (entry->or_mask & entry->and_mask);
}
- WREG32(reg, tmp);
+
+ if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) ||
+ reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) ||
+ reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) ||
+ reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG))
+ WREG32_RLC(reg, tmp);
+ else
+ WREG32(reg, tmp);
+
}
}
@@ -475,6 +479,13 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
soc15_asic_get_baco_capability(adev, &baco_reset);
else
baco_reset = false;
+ if (baco_reset) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ if (hive || (ras && ras->supported))
+ baco_reset = false;
+ }
break;
default:
baco_reset = false;
@@ -606,12 +617,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_VEGA20:
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
- amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
- if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
- if (adev->asic_type == CHIP_VEGA20)
- amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
- else
- amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+
+ /* For Vega10 SR-IOV, PSP need to be initialized before IH */
+ if (amdgpu_sriov_vf(adev)) {
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ if (adev->asic_type == CHIP_VEGA20)
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+ }
+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+ } else {
+ amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+ if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+ if (adev->asic_type == CHIP_VEGA20)
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+ }
}
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
@@ -690,8 +713,8 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
/* This reports 0 on APUs, so return to avoid writing/reading registers
* that may or may not be different from their GPU counterparts
*/
- if (adev->flags & AMD_IS_APU)
- return;
+ if (adev->flags & AMD_IS_APU)
+ return;
/* Set the 2 events that we wish to watch, defined above */
/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
@@ -733,7 +756,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
/* Just return false for soc15 GPUs. Reset does not seem to
* be necessary.
*/
- return false;
+ if (!amdgpu_passthrough(adev))
+ return false;
if (adev->flags & AMD_IS_APU)
return false;
@@ -748,6 +772,18 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
return false;
}
+static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
static const struct amdgpu_asic_funcs soc15_asic_funcs =
{
.read_disabled_bios = &soc15_read_disabled_bios,
@@ -765,6 +801,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.init_doorbell_index = &vega10_doorbell_index_init,
.get_pcie_usage = &soc15_get_pcie_usage,
.need_reset_on_init = &soc15_need_reset_on_init,
+ .get_pcie_replay_count = &soc15_get_pcie_replay_count,
};
static const struct amdgpu_asic_funcs vega20_asic_funcs =
@@ -784,12 +821,16 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.init_doorbell_index = &vega20_doorbell_index_init,
.get_pcie_usage = &soc15_get_pcie_usage,
.need_reset_on_init = &soc15_need_reset_on_init,
+ .get_pcie_replay_count = &soc15_get_pcie_replay_count,
};
static int soc15_common_early_init(void *handle)
{
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
adev->smc_rreg = NULL;
adev->smc_wreg = NULL;
adev->pcie_rreg = &soc15_pcie_rreg;
@@ -985,6 +1026,8 @@ static int soc15_common_sw_init(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_add_irq_id(adev);
+ adev->df_funcs->sw_init(adev);
+
return 0;
}
@@ -998,11 +1041,17 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
int i;
struct amdgpu_ring *ring;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
- adev->nbio_funcs->sdma_doorbell_range(adev, i,
- ring->use_doorbell, ring->doorbell_index,
- adev->doorbell_index.sdma_doorbell_range);
+ /* Two reasons to skip
+ * 1, Host driver already programmed them
+ * 2, To avoid registers program violations in SR-IOV
+ */
+ if (!amdgpu_virt_support_skip_setting(adev)) {
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ adev->nbio_funcs->sdma_doorbell_range(adev, i,
+ ring->use_doorbell, ring->doorbell_index,
+ adev->doorbell_index.sdma_doorbell_range);
+ }
}
adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
@@ -1019,6 +1068,13 @@ static int soc15_common_hw_init(void *handle)
soc15_program_aspm(adev);
/* setup nbio registers */
adev->nbio_funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio_funcs->remap_hdp_registers)
+ adev->nbio_funcs->remap_hdp_registers(adev);
+
/* enable the doorbell aperture */
soc15_enable_doorbell_aperture(adev, true);
/* HW doorbell routing policy: doorbell writing not
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index a66c8bfbbaa6..7a6b2cc6d9f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -42,8 +42,28 @@ struct soc15_reg_golden {
u32 or_mask;
};
+struct soc15_reg_entry {
+ uint32_t hwip;
+ uint32_t inst;
+ uint32_t seg;
+ uint32_t reg_offset;
+ uint32_t reg_value;
+ uint32_t se_num;
+ uint32_t instance;
+};
+
+struct soc15_allowed_register_entry {
+ uint32_t hwip;
+ uint32_t inst;
+ uint32_t seg;
+ uint32_t reg_offset;
+ bool grbm_indexed;
+};
+
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
+#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
+
#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 49c262540940..47f74dab365d 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -69,26 +69,60 @@
} \
} while (0)
-#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
- ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
- UVD_DPG_LMA_CTL__MASK_EN_MASK | \
- ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
- << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
- (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
- RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); })
+#define WREG32_RLC(reg, value) \
+ do { \
+ if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
+ uint32_t i = 0; \
+ uint32_t retries = 50000; \
+ uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \
+ uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1; \
+ uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT; \
+ WREG32(r0, value); \
+ WREG32(r1, (reg | 0x80000000)); \
+ WREG32(spare_int, 0x1); \
+ for (i = 0; i < retries; i++) { \
+ u32 tmp = RREG32(r1); \
+ if (!(tmp & 0x80000000)) \
+ break; \
+ udelay(10); \
+ } \
+ if (i >= retries) \
+ pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg); \
+ } else { \
+ WREG32(reg, value); \
+ } \
+ } while (0)
-#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
+#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
do { \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
- WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
- UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
- ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
- << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
- (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
+ if (amdgpu_virt_support_rlc_prg_reg(adev)) { \
+ uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \
+ uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \
+ uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
+ uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; \
+ if (target_reg == grbm_cntl) \
+ WREG32(r2, value); \
+ else if (target_reg == grbm_idx) \
+ WREG32(r3, value); \
+ WREG32(target_reg, value); \
+ } else { \
+ WREG32(target_reg, value); \
+ } \
} while (0)
-#endif
+#define WREG32_SOC15_RLC(ip, inst, reg, value) \
+ do { \
+ uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
+ WREG32_RLC(target_reg, value); \
+ } while (0)
+
+#define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \
+ WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
+ (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
+ & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
+ WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value)
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 0b4e7b55595a..ca7d05993ca2 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -1,29 +1,26 @@
-/****************************************************************************\
-*
-* File Name ta_ras_if.h
-* Project AMD PSP SW IP Module
-*
-* Description Interface to the RAS Trusted Application
-*
-* Copyright 2019 Advanced Micro Devices, Inc.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a copy of this software
-* and associated documentation files (the "Software"), to deal in the Software without restriction,
-* including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
-* subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in all copies or substantial
-* portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-* OTHER DEALINGS IN THE SOFTWARE.
-*/
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
#ifndef _TA_RAS_IF_H
#define _TA_RAS_IF_H
@@ -31,8 +28,8 @@
#define RSP_ID_MASK (1U << 31)
#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK)
-#define TA_NUM_BLOCK_MAX 14
-
+/* RAS related enumerations */
+/**********************************************************/
enum ras_command {
TA_RAS_COMMAND__ENABLE_FEATURES = 0,
TA_RAS_COMMAND__DISABLE_FEATURES,
@@ -45,7 +42,12 @@ enum ta_ras_status {
TA_RAS_STATUS__ERROR_INVALID_PARAMETER = 0x02,
TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE = 0x03,
TA_RAS_STATUS__ERROR_RAS_DUPLICATE_CMD = 0x04,
- TA_RAS_STATUS__ERROR_INJECTION_FAILED = 0x05
+ TA_RAS_STATUS__ERROR_INJECTION_FAILED = 0x05,
+ TA_RAS_STATUS__ERROR_ASD_READ_WRITE = 0x06,
+ TA_RAS_STATUS__ERROR_TOGGLE_DF_CSTATE = 0x07,
+ TA_RAS_STATUS__ERROR_TIMEOUT = 0x08,
+ TA_RAS_STATUS__ERROR_BLOCK_DISABLED = 0x09,
+ TA_RAS_STATUS__ERROR_GENERIC = 0x10,
};
enum ta_ras_block {
@@ -62,47 +64,55 @@ enum ta_ras_block {
TA_RAS_BLOCK__SEM,
TA_RAS_BLOCK__MP0,
TA_RAS_BLOCK__MP1,
- TA_RAS_BLOCK__FUSE = (TA_NUM_BLOCK_MAX - 1),
+ TA_RAS_BLOCK__FUSE,
+ TA_NUM_BLOCK_MAX
};
enum ta_ras_error_type {
- TA_RAS_ERROR__NONE = 0,
- TA_RAS_ERROR__PARITY = 1,
- TA_RAS_ERROR__SINGLE_CORRECTABLE = 2,
- TA_RAS_ERROR__MULTI_UNCORRECTABLE = 4,
- TA_RAS_ERROR__POISON = 8
+ TA_RAS_ERROR__NONE = 0,
+ TA_RAS_ERROR__PARITY = 1,
+ TA_RAS_ERROR__SINGLE_CORRECTABLE = 2,
+ TA_RAS_ERROR__MULTI_UNCORRECTABLE = 4,
+ TA_RAS_ERROR__POISON = 8,
};
+/* Input/output structures for RAS commands */
+/**********************************************************/
+
struct ta_ras_enable_features_input {
- enum ta_ras_block block_id;
- enum ta_ras_error_type error_type;
+ enum ta_ras_block block_id;
+ enum ta_ras_error_type error_type;
};
struct ta_ras_disable_features_input {
- enum ta_ras_block block_id;
- enum ta_ras_error_type error_type;
+ enum ta_ras_block block_id;
+ enum ta_ras_error_type error_type;
};
struct ta_ras_trigger_error_input {
- enum ta_ras_block block_id;
- enum ta_ras_error_type inject_error_type;
- uint32_t sub_block_index;
- uint64_t address;
- uint64_t value;
+ enum ta_ras_block block_id; // ras-block. i.e. umc, gfx
+ enum ta_ras_error_type inject_error_type; // type of error. i.e. single_correctable
+ uint32_t sub_block_index; // mem block. i.e. hbm, sram etc.
+ uint64_t address; // explicit address of error
+ uint64_t value; // method if error injection. i.e persistent, coherent etc.
};
+/* Common input structure for RAS callbacks */
+/**********************************************************/
union ta_ras_cmd_input {
struct ta_ras_enable_features_input enable_features;
struct ta_ras_disable_features_input disable_features;
struct ta_ras_trigger_error_input trigger_error;
};
+/* Shared Memory structures */
+/**********************************************************/
struct ta_ras_shared_memory {
- uint32_t cmd_id;
- uint32_t resp_id;
- enum ta_ras_status ras_status;
- uint32_t reserved;
- union ta_ras_cmd_input ras_in_message;
+ uint32_t cmd_id;
+ uint32_t resp_id;
+ enum ta_ras_status ras_status;
+ uint32_t reserved;
+ union ta_ras_cmd_input ras_in_message;
};
#endif // TL_RAS_IF_H_
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index a20b711a6756..e40140bf6699 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -20,7 +20,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index c4fb58667fd4..82abd8e728ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -23,7 +23,7 @@
*/
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_uvd.h"
#include "cikd.h"
@@ -491,7 +491,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
tmp = RREG32(mmUVD_CONTEXT_ID);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = uvd_v4_2_ring_get_rptr,
.get_wptr = uvd_v4_2_ring_get_wptr,
.set_wptr = uvd_v4_2_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 52bd8a654734..01e62fb8e6e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -22,8 +22,9 @@
* Authors: Christian König <christian.koenig@amd.com>
*/
+#include <linux/delay.h>
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_uvd.h"
#include "vid.h"
@@ -506,7 +507,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
tmp = RREG32(mmUVD_CONTEXT_ID);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -849,6 +850,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = uvd_v5_0_ring_get_rptr,
.get_wptr = uvd_v5_0_ring_get_wptr,
.set_wptr = uvd_v5_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index be70e6e5f9df..670784a78512 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -23,7 +23,7 @@
*/
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_uvd.h"
#include "vid.h"
@@ -186,7 +186,7 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
for (i = 0; i < adev->usec_timeout; i++) {
if (amdgpu_ring_get_rptr(ring) != rptr)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -960,7 +960,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
tmp = RREG32(mmUVD_CONTEXT_ID);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -1505,6 +1505,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = uvd_v6_0_ring_get_rptr,
.get_wptr = uvd_v6_0_ring_get_wptr,
.set_wptr = uvd_v6_0_ring_set_wptr,
@@ -1530,6 +1531,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = uvd_v6_0_ring_get_rptr,
.get_wptr = uvd_v6_0_ring_get_wptr,
.set_wptr = uvd_v6_0_ring_set_wptr,
@@ -1558,6 +1560,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = HEVC_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = uvd_v6_0_enc_ring_get_rptr,
.get_wptr = uvd_v6_0_enc_ring_get_wptr,
.set_wptr = uvd_v6_0_enc_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index fc4f0bb9a2e7..a6bfe7651d07 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -22,7 +22,7 @@
*/
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_uvd.h"
#include "soc15.h"
@@ -194,7 +194,7 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
for (i = 0; i < adev->usec_timeout; i++) {
if (amdgpu_ring_get_rptr(ring) != rptr)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -1230,7 +1230,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID);
if (tmp == 0xDEADBEEF)
break;
- DRM_UDELAY(1);
+ udelay(1);
}
if (i >= adev->usec_timeout)
@@ -1762,6 +1762,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = uvd_v7_0_ring_get_rptr,
.get_wptr = uvd_v7_0_ring_get_wptr,
@@ -1794,6 +1795,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = HEVC_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = uvd_v7_0_enc_ring_get_rptr,
.get_wptr = uvd_v7_0_enc_ring_get_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 40363ca6c5f1..b6837fcfdba7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -26,7 +26,7 @@
*/
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_vce.h"
#include "cikd.h"
@@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
.align_mask = 0xf,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = vce_v2_0_ring_get_rptr,
.get_wptr = vce_v2_0_ring_get_wptr,
.set_wptr = vce_v2_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6ec65cf11112..475ae68f38f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -26,7 +26,7 @@
*/
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_vce.h"
#include "vid.h"
@@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
.align_mask = 0xf,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = vce_v3_0_ring_get_rptr,
.get_wptr = vce_v3_0_ring_get_wptr,
.set_wptr = vce_v3_0_ring_set_wptr,
@@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
.align_mask = 0xf,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.get_rptr = vce_v3_0_ring_get_rptr,
.get_wptr = vce_v3_0_ring_get_wptr,
.set_wptr = vce_v3_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index c0ec27991c22..eafbe8d8248d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -25,7 +25,7 @@
*/
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_vce.h"
#include "soc15.h"
@@ -1069,6 +1069,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = VCE_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = vce_v4_0_ring_get_rptr,
.get_wptr = vce_v4_0_ring_get_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 3dbc51f9d3b9..dde22b7d140d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -22,7 +22,7 @@
*/
#include <linux/firmware.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_vcn.h"
#include "soc15.h"
@@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
+ struct dpg_pause_state *new_state);
/**
* vcn_v1_0_early_init - set function pointers
@@ -126,6 +128,17 @@ static int vcn_v1_0_sw_init(void *handle)
if (r)
return r;
+ adev->vcn.internal.scratch9 = adev->vcn.external.scratch9 =
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
+ adev->vcn.internal.data0 = adev->vcn.external.data0 =
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
+ adev->vcn.internal.data1 = adev->vcn.external.data1 =
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
+ adev->vcn.internal.cmd = adev->vcn.external.cmd =
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
+ adev->vcn.internal.nop = adev->vcn.external.nop =
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
+
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
ring = &adev->vcn.ring_enc[i];
sprintf(ring->name, "vcn_enc%d", i);
@@ -140,7 +153,11 @@ static int vcn_v1_0_sw_init(void *handle)
if (r)
return r;
- return r;
+ adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
+ adev->vcn.internal.jpeg_pitch = adev->vcn.external.jpeg_pitch =
+ SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
+
+ return 0;
}
/**
@@ -1204,6 +1221,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
return r;
}
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
+ struct dpg_pause_state *new_state)
+{
+ int ret_code;
+ uint32_t reg_data = 0;
+ uint32_t reg_data2 = 0;
+ struct amdgpu_ring *ring;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+ adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+ new_state->fw_based, new_state->jpeg);
+
+ reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+
+ if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ if (!ret_code) {
+ /* pause DPG non-jpeg */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+
+ /* Restore */
+ ring = &adev->vcn.ring_enc[0];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.ring_enc[1];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.ring_dec;
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ }
+ } else {
+ /* unpause dpg non-jpeg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.pause_state.fw_based = new_state->fw_based;
+ }
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
+ DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+ adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+ new_state->fw_based, new_state->jpeg);
+
+ reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+
+ if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ if (!ret_code) {
+ /* Make sure JPRG Snoop is disabled before sending the pause */
+ reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
+ reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
+
+ /* pause DPG jpeg */
+ reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
+
+ /* Restore */
+ ring = &adev->vcn.ring_jpeg;
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+ UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+
+ ring = &adev->vcn.ring_dec;
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ }
+ } else {
+ /* unpause dpg jpeg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.pause_state.jpeg = new_state->jpeg;
+ }
+
+ return 0;
+}
+
static bool vcn_v1_0_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2054,6 +2197,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = vcn_v1_0_dec_ring_get_rptr,
.get_wptr = vcn_v1_0_dec_ring_get_wptr,
@@ -2087,6 +2231,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.get_rptr = vcn_v1_0_enc_ring_get_rptr,
.get_wptr = vcn_v1_0_enc_ring_get_wptr,
@@ -2118,6 +2263,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
.align_mask = 0xf,
.nop = PACKET0(0x81ff, 0),
.support_64bit_ptrs = false,
+ .no_user_fence = true,
.vmhub = AMDGPU_MMHUB,
.extra_dw = 64,
.get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
new file mode 100644
index 000000000000..988c0adaca91
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -0,0 +1,2261 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_vcn.h"
+#include "soc15.h"
+#include "soc15d.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_psp.h"
+
+#include "vcn/vcn_2_0_0_offset.h"
+#include "vcn/vcn_2_0_0_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
+
+#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd
+#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503
+#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x504
+#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x505
+#define mmUVD_NO_OP_INTERNAL_OFFSET 0x53f
+#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x54a
+#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d
+
+#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x1e1
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x5a6
+#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7
+#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2
+
+#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff
+#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029
+#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a
+#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea
+#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb
+#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf
+#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8
+#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9
+#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec
+#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed
+#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085
+#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
+#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
+#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+
+#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+
+#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b
+#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1
+#define mmUVD_REG_XX_MASK 0x026c
+#define mmUVD_REG_XX_MASK_BASE_IDX 1
+
+static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vcn_v2_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state);
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
+ struct dpg_pause_state *new_state);
+
+/**
+ * vcn_v2_0_early_init - set function pointers
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Set ring and irq function pointers
+ */
+static int vcn_v2_0_early_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->vcn.num_enc_rings = 2;
+
+ vcn_v2_0_set_dec_ring_funcs(adev);
+ vcn_v2_0_set_enc_ring_funcs(adev);
+ vcn_v2_0_set_jpeg_ring_funcs(adev);
+ vcn_v2_0_set_irq_funcs(adev);
+
+ return 0;
+}
+
+/**
+ * vcn_v2_0_sw_init - sw init for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Load firmware and sw initialization
+ */
+static int vcn_v2_0_sw_init(void *handle)
+{
+ struct amdgpu_ring *ring;
+ int i, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* VCN DEC TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT,
+ &adev->vcn.irq);
+ if (r)
+ return r;
+
+ /* VCN ENC TRAP */
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
+ &adev->vcn.irq);
+ if (r)
+ return r;
+ }
+
+ /* VCN JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+ VCN_2_0__SRCID__JPEG_DECODE,
+ &adev->vcn.irq);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sw_init(adev);
+ if (r)
+ return r;
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ const struct common_firmware_header *hdr;
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+ DRM_INFO("PSP loading VCN firmware\n");
+ }
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ ring = &adev->vcn.ring_dec;
+
+ ring->use_doorbell = true;
+ ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
+
+ sprintf(ring->name, "vcn_dec");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
+ if (r)
+ return r;
+
+ adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
+ adev->vcn.external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);
+ adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
+ adev->vcn.external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);
+ adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
+ adev->vcn.external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);
+ adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
+ adev->vcn.external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);
+ adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
+ adev->vcn.external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ ring = &adev->vcn.ring_enc[i];
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i;
+ sprintf(ring->name, "vcn_enc%d", i);
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
+ if (r)
+ return r;
+ }
+
+ ring = &adev->vcn.ring_jpeg;
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;
+ sprintf(ring->name, "vcn_jpeg");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
+ if (r)
+ return r;
+
+ adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;
+
+ adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
+ adev->vcn.external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);
+
+ return 0;
+}
+
+/**
+ * vcn_v2_0_sw_fini - sw fini for VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * VCN suspend and free up sw allocation
+ */
+static int vcn_v2_0_sw_fini(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_suspend(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_sw_fini(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_0_hw_init - start and test VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+static int vcn_v2_0_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ int i, r;
+
+ adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ring->doorbell_index);
+
+ ring->sched.ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->sched.ready = false;
+ goto done;
+ }
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ ring = &adev->vcn.ring_enc[i];
+ ring->sched.ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->sched.ready = false;
+ goto done;
+ }
+ }
+
+ ring = &adev->vcn.ring_jpeg;
+ ring->sched.ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->sched.ready = false;
+ goto done;
+ }
+
+done:
+ if (!r)
+ DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
+
+ return r;
+}
+
+/**
+ * vcn_v2_0_hw_fini - stop the hardware block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Stop the VCN block, mark ring as not ready any more
+ */
+static int vcn_v2_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ int i;
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
+ vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+
+ ring->sched.ready = false;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ ring = &adev->vcn.ring_enc[i];
+ ring->sched.ready = false;
+ }
+
+ ring = &adev->vcn.ring_jpeg;
+ ring->sched.ready = false;
+
+ return 0;
+}
+
+/**
+ * vcn_v2_0_suspend - suspend VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * HW fini and suspend VCN block
+ */
+static int vcn_v2_0_suspend(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = vcn_v2_0_hw_fini(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_suspend(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_0_resume - resume VCN block
+ *
+ * @handle: amdgpu_device pointer
+ *
+ * Resume firmware and hw init VCN block
+ */
+static int vcn_v2_0_resume(void *handle)
+{
+ int r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ r = amdgpu_vcn_resume(adev);
+ if (r)
+ return r;
+
+ r = vcn_v2_0_hw_init(adev);
+
+ return r;
+}
+
+/**
+ * vcn_v2_0_mc_resume - memory controller programming
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Let the VCN memory controller know it's offsets
+ */
+static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.gpu_addr));
+ offset = size;
+ /* No signed header for now from firmware
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ */
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ }
+
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
+
+ /* cache window 1: stack */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.gpu_addr + offset));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.gpu_addr + offset));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+
+ /* cache window 2: context */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
+
+ WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+}
+
+static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
+/**
+ * vcn_v2_0_disable_clock_gating - disable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @sw: enable SW clock gating
+ *
+ * Disable clock gating for VCN block
+ */
+static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ /* UVD disable CGC */
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE);
+ data &= ~(UVD_CGC_GATE__SYS_MASK
+ | UVD_CGC_GATE__UDEC_MASK
+ | UVD_CGC_GATE__MPEG2_MASK
+ | UVD_CGC_GATE__REGS_MASK
+ | UVD_CGC_GATE__RBC_MASK
+ | UVD_CGC_GATE__LMI_MC_MASK
+ | UVD_CGC_GATE__LMI_UMC_MASK
+ | UVD_CGC_GATE__IDCT_MASK
+ | UVD_CGC_GATE__MPRD_MASK
+ | UVD_CGC_GATE__MPC_MASK
+ | UVD_CGC_GATE__LBSI_MASK
+ | UVD_CGC_GATE__LRBBM_MASK
+ | UVD_CGC_GATE__UDEC_RE_MASK
+ | UVD_CGC_GATE__UDEC_CM_MASK
+ | UVD_CGC_GATE__UDEC_IT_MASK
+ | UVD_CGC_GATE__UDEC_DB_MASK
+ | UVD_CGC_GATE__UDEC_MP_MASK
+ | UVD_CGC_GATE__WCB_MASK
+ | UVD_CGC_GATE__VCPU_MASK
+ | UVD_CGC_GATE__SCPU_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__SCPU_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ /* turn on */
+ data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE);
+ data |= (UVD_SUVD_CGC_GATE__SRE_MASK
+ | UVD_SUVD_CGC_GATE__SIT_MASK
+ | UVD_SUVD_CGC_GATE__SMP_MASK
+ | UVD_SUVD_CGC_GATE__SCM_MASK
+ | UVD_SUVD_CGC_GATE__SDB_MASK
+ | UVD_SUVD_CGC_GATE__SRE_H264_MASK
+ | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_H264_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCM_H264_MASK
+ | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_H264_MASK
+ | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
+ | UVD_SUVD_CGC_GATE__SCLR_MASK
+ | UVD_SUVD_CGC_GATE__UVD_SC_MASK
+ | UVD_SUVD_CGC_GATE__ENT_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
+ | UVD_SUVD_CGC_GATE__SITE_MASK
+ | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
+ | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
+ | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
+ | UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
+ data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
+}
+
+static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,
+ uint8_t sram_sel, uint8_t indirect)
+{
+ uint32_t reg_data = 0;
+
+ /* enable sw clock gating control */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__SCPU_MODE_MASK);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
+/**
+ * jpeg_v2_0_start - start JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Setup and start the JPEG block
+ */
+static int jpeg_v2_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->vcn.ring_jpeg;
+ uint32_t tmp;
+ int r = 0;
+
+ /* disable power gating */
+ tmp = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp);
+
+ SOC15_WAIT_ON_RREG(VCN, 0,
+ mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON,
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG disable power gating failed\n");
+ return r;
+ }
+
+ /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */
+ tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1;
+ WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp);
+
+ /* JPEG disable CGC */
+ tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
+ tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp);
+
+ tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
+ tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
+ | JPEG_CGC_GATE__JPEG2_DEC_MASK
+ | JPEG_CGC_GATE__JPEG_ENC_MASK
+ | JPEG_CGC_GATE__JMCIF_MASK
+ | JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp);
+
+ /* enable JMI channel */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), 0,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_P(SOC15_REG_OFFSET(VCN, 0, mmJPEG_SYS_INT_EN),
+ JPEG_SYS_INT_EN__DJRBC_MASK,
+ ~JPEG_SYS_INT_EN__DJRBC_MASK);
+
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
+ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
+
+ return 0;
+}
+
+/**
+ * jpeg_v2_0_stop - stop JPEG block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * stop the JPEG block
+ */
+static int jpeg_v2_0_stop(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int r = 0;
+
+ /* reset JMI */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL),
+ UVD_JMI_CNTL__SOFT_RESET_MASK,
+ ~UVD_JMI_CNTL__SOFT_RESET_MASK);
+
+ /* enable JPEG CGC */
+ tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
+ tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp);
+
+
+ tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE);
+ tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK
+ |JPEG_CGC_GATE__JPEG2_DEC_MASK
+ |JPEG_CGC_GATE__JPEG_ENC_MASK
+ |JPEG_CGC_GATE__JMCIF_MASK
+ |JPEG_CGC_GATE__JRBBM_MASK);
+ WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp);
+
+ /* enable power gating */
+ tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS));
+ tmp &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK;
+ tmp |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF;
+ WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp);
+
+ tmp = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT;
+ WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp);
+
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS,
+ (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT),
+ UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r);
+
+ if (r) {
+ DRM_ERROR("amdgpu: JPEG enable power gating failed\n");
+ return r;
+ }
+
+ return r;
+}
+
+/**
+ * vcn_v2_0_enable_clock_gating - enable VCN clock gating
+ *
+ * @adev: amdgpu_device pointer
+ * @sw: enable SW clock gating
+ *
+ * Enable clock gating for VCN block
+ */
+static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+
+ /* enable UVD CGC */
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
+ data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
+ | UVD_CGC_CTRL__SYS_MODE_MASK
+ | UVD_CGC_CTRL__UDEC_MODE_MASK
+ | UVD_CGC_CTRL__MPEG2_MODE_MASK
+ | UVD_CGC_CTRL__REGS_MODE_MASK
+ | UVD_CGC_CTRL__RBC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_MC_MODE_MASK
+ | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
+ | UVD_CGC_CTRL__IDCT_MODE_MASK
+ | UVD_CGC_CTRL__MPRD_MODE_MASK
+ | UVD_CGC_CTRL__MPC_MODE_MASK
+ | UVD_CGC_CTRL__LBSI_MODE_MASK
+ | UVD_CGC_CTRL__LRBBM_MODE_MASK
+ | UVD_CGC_CTRL__WCB_MODE_MASK
+ | UVD_CGC_CTRL__VCPU_MODE_MASK
+ | UVD_CGC_CTRL__SCPU_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data);
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL);
+ data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
+ | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
+ WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
+}
+
+static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+ int ret;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS,
+ UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0, 0xFFFFF, ret);
+ } else {
+ data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT);
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFF, ret);
+ }
+
+ /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS,
+ * UVDU_PWR_STATUS are 0 (power on) */
+
+ data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+ data &= ~0x103;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+ data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
+ UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+ WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+}
+
+static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)
+{
+ uint32_t data = 0;
+ int ret;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+ /* Before power off, this indicator has to be turned on */
+ data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+ data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+ data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+ WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+
+
+ data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+ | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT);
+
+ WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+
+ data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
+ | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFF, ret);
+ }
+}
+
+static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)
+{
+ struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ uint32_t rb_bufsz, tmp;
+
+ vcn_v2_0_enable_static_power_gating(adev);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interupt */
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v2_0_mc_resume_dpg_mode(adev, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
+
+ /* release VCPU reset to boot */
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_LMI_CTRL2),
+ 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, 0, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect)
+ psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr,
+ (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr -
+ (uintptr_t)adev->vcn.dpg_sram_cpu_addr));
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ return 0;
+}
+
+static int vcn_v2_0_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ uint32_t rb_bufsz, tmp;
+ uint32_t lmi_swap_cntl;
+ int i, j, r;
+
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, true);
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram);
+ if (r)
+ return r;
+ goto jpeg;
+ }
+
+ vcn_v2_0_disable_static_power_gating(adev);
+
+ /* set uvd status busy */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
+
+ /*SW clock gating */
+ vcn_v2_0_disable_clock_gating(adev);
+
+ /* enable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* disable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL);
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+
+ /* setup mmUVD_MPC_CNTL */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(VCN, 0, mmUVD_MPC_CNTL, tmp);
+
+ /* setup UVD_MPC_SET_MUXA0 */
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ /* setup UVD_MPC_SET_MUXB0 */
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ /* setup mmUVD_MPC_SET_MUX */
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+
+ vcn_v2_0_mc_resume(adev);
+
+ /* release VCPU reset to boot */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+ tmp = RREG32_SOC15(VCN, 0, mmUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(VCN, 0, mmUVD_SOFT_RESET, tmp);
+
+ /* disable byte swapping */
+ lmi_swap_cntl = 0;
+#ifdef __BIG_ENDIAN
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+#endif
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+
+ for (i = 0; i < 10; ++i) {
+ uint32_t status;
+
+ for (j = 0; j < 100; ++j) {
+ status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+ if (status & 2)
+ break;
+ mdelay(10);
+ }
+ r = 0;
+ if (status & 2)
+ break;
+
+ DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+ mdelay(10);
+ r = -1;
+ }
+
+ if (r) {
+ DRM_ERROR("VCN decode not responding, giving up!!!\n");
+ return r;
+ }
+
+ /* enable master interrupt */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
+
+ /* clear the busy bit of VCN_STATUS */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_VMID, 0);
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.ring_enc[0];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+
+ ring = &adev->vcn.ring_enc[1];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+
+jpeg:
+ r = jpeg_v2_0_start(adev);
+
+ return r;
+}
+
+static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev)
+{
+ int ret_code = 0;
+ uint32_t tmp;
+
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int vcn_v2_0_stop(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int r;
+
+ r = jpeg_v2_0_stop(adev);
+ if (r)
+ return r;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_0_stop_dpg_mode(adev);
+ if (r)
+ return r;
+ goto power_off;
+ }
+
+ /* wait for uvd idle */
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
+ if (r)
+ return r;
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r);
+ if (r)
+ return r;
+
+ /* stall UMC channel */
+ tmp = RREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2);
+ tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
+ WREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2, tmp);
+
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r);
+ if (r)
+ return r;
+
+ /* disable VCPU clock */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0,
+ ~(UVD_VCPU_CNTL__CLK_EN_MASK));
+
+ /* reset LMI UMC */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+ /* reset LMI */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__LMI_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
+
+ /* reset VCPU */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ /* clear status */
+ WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
+
+ vcn_v2_0_enable_clock_gating(adev);
+ vcn_v2_0_enable_static_power_gating(adev);
+
+power_off:
+ if (adev->pm.dpm_enabled)
+ amdgpu_dpm_enable_uvd(adev, false);
+
+ return 0;
+}
+
+static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,
+ struct dpg_pause_state *new_state)
+{
+ struct amdgpu_ring *ring;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d -> %d",
+ adev->vcn.pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+
+ /* Restore */
+ ring = &adev->vcn.ring_enc[0];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.ring_enc[1];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
+static bool vcn_v2_0_is_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
+}
+
+static int vcn_v2_0_wait_for_idle(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int ret = 0;
+
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE, ret);
+
+ return ret;
+}
+
+static int vcn_v2_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+
+ if (enable) {
+ /* wait for STATUS to clear */
+ if (vcn_v2_0_is_idle(handle))
+ return -EBUSY;
+ vcn_v2_0_enable_clock_gating(adev);
+ } else {
+ /* disable HW gating and enable Sw gating */
+ vcn_v2_0_disable_clock_gating(adev);
+ }
+ return 0;
+}
+
+/**
+ * vcn_v2_0_dec_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vcn_v2_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+}
+
+/**
+ * vcn_v2_0_dec_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vcn_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
+}
+
+/**
+ * vcn_v2_0_dec_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2,
+ lower_32_bits(ring->wptr) | 0x80000000);
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+/**
+ * vcn_v2_0_dec_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1);
+}
+
+/**
+ * vcn_v2_0_dec_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1);
+}
+
+/**
+ * vcn_v2_0_dec_ring_insert_nop - insert a nop command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a nop command to the ring.
+ */
+static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+/**
+ * vcn_v2_0_dec_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, addr & 0xffffffff);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+
+ amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1);
+}
+
+/**
+ * vcn_v2_0_dec_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer
+ */
+static void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, vmid);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+static void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, reg << 2);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, mask);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+
+ amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1);
+}
+
+static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, reg << 2);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
+
+ amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
+}
+
+/**
+ * vcn_v2_0_enc_ring_get_rptr - get enc read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc read pointer
+ */
+static uint64_t vcn_v2_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.ring_enc[0])
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
+ else
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
+}
+
+ /**
+ * vcn_v2_0_enc_ring_get_wptr - get enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware enc write pointer
+ */
+static uint64_t vcn_v2_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.ring_enc[0]) {
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
+ } else {
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
+ }
+}
+
+ /**
+ * vcn_v2_0_enc_ring_set_wptr - set enc write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the enc write pointer to the hardware
+ */
+static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring == &adev->vcn.ring_enc[0]) {
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+ } else {
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+ }
+ }
+}
+
+/**
+ * vcn_v2_0_enc_ring_emit_fence - emit an enc fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write enc a fence and a trap command to the ring.
+ */
+static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, VCN_ENC_CMD_FENCE);
+ amdgpu_ring_write(ring, addr);
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP);
+}
+
+static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, VCN_ENC_CMD_END);
+}
+
+/**
+ * vcn_v2_0_enc_ring_emit_ib - enc execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write enc ring commands to execute the indirect buffer
+ */
+static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, VCN_ENC_CMD_IB);
+ amdgpu_ring_write(ring, vmid);
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+ amdgpu_ring_write(ring, ib->length_dw);
+}
+
+static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
+static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned int vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for reg writes */
+ vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
+ lower_32_bits(pd_addr), 0xffffffff);
+}
+
+static void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
+}
+
+/**
+ * vcn_v2_0_jpeg_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vcn_v2_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * vcn_v2_0_jpeg_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vcn_v2_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell)
+ return adev->wb.wb[ring->wptr_offs];
+ else
+ return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * vcn_v2_0_jpeg_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vcn_v2_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->use_doorbell) {
+ adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
+ } else {
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+ }
+}
+
+/**
+ * vcn_v2_0_jpeg_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80010000);
+}
+
+/**
+ * vcn_v2_0_jpeg_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00010000);
+}
+
+/**
+ * vcn_v2_0_jpeg_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x3fbc);
+
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x1);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * vcn_v2_0_jpeg_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib,
+ uint32_t flags)
+{
+ unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET,
+ 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
+ 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+static void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static int vcn_v2_0_set_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned type,
+ enum amdgpu_interrupt_state state)
+{
+ return 0;
+}
+
+static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ DRM_DEBUG("IH: VCN TRAP\n");
+
+ switch (entry->src_id) {
+ case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
+ amdgpu_fence_process(&adev->vcn.ring_dec);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
+ amdgpu_fence_process(&adev->vcn.ring_enc[0]);
+ break;
+ case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
+ amdgpu_fence_process(&adev->vcn.ring_enc[1]);
+ break;
+ case VCN_2_0__SRCID__JPEG_DECODE:
+ amdgpu_fence_process(&adev->vcn.ring_jpeg);
+ break;
+ default:
+ DRM_ERROR("Unhandled interrupt: %d %d\n",
+ entry->src_id, entry->src_data[0]);
+ break;
+ }
+
+ return 0;
+}
+
+static int vcn_v2_0_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* This doesn't actually powergate the VCN block.
+ * That's done in the dpm code via the SMC. This
+ * just re-inits the block as necessary. The actual
+ * gating still happens in the dpm code. We should
+ * revisit this when there is a cleaner line between
+ * the smc and the hw blocks
+ */
+ int ret;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (state == adev->vcn.cur_state)
+ return 0;
+
+ if (state == AMD_PG_STATE_GATE)
+ ret = vcn_v2_0_stop(adev);
+ else
+ ret = vcn_v2_0_start(adev);
+
+ if (!ret)
+ adev->vcn.cur_state = state;
+ return ret;
+}
+
+static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
+ .name = "vcn_v2_0",
+ .early_init = vcn_v2_0_early_init,
+ .late_init = NULL,
+ .sw_init = vcn_v2_0_sw_init,
+ .sw_fini = vcn_v2_0_sw_fini,
+ .hw_init = vcn_v2_0_hw_init,
+ .hw_fini = vcn_v2_0_hw_fini,
+ .suspend = vcn_v2_0_suspend,
+ .resume = vcn_v2_0_resume,
+ .is_idle = vcn_v2_0_is_idle,
+ .wait_for_idle = vcn_v2_0_wait_for_idle,
+ .check_soft_reset = NULL,
+ .pre_soft_reset = NULL,
+ .soft_reset = NULL,
+ .post_soft_reset = NULL,
+ .set_clockgating_state = vcn_v2_0_set_clockgating_state,
+ .set_powergating_state = vcn_v2_0_set_powergating_state,
+};
+
+static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_DEC,
+ .align_mask = 0xf,
+ .vmhub = AMDGPU_MMHUB,
+ .get_rptr = vcn_v2_0_dec_ring_get_rptr,
+ .get_wptr = vcn_v2_0_dec_ring_get_wptr,
+ .set_wptr = vcn_v2_0_dec_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
+ 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
+ .emit_ib = vcn_v2_0_dec_ring_emit_ib,
+ .emit_fence = vcn_v2_0_dec_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_dec_ring_test_ring,
+ .test_ib = amdgpu_vcn_dec_ring_test_ib,
+ .insert_nop = vcn_v2_0_dec_ring_insert_nop,
+ .insert_start = vcn_v2_0_dec_ring_insert_start,
+ .insert_end = vcn_v2_0_dec_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_ENC,
+ .align_mask = 0x3f,
+ .nop = VCN_ENC_CMD_NO_OP,
+ .vmhub = AMDGPU_MMHUB,
+ .get_rptr = vcn_v2_0_enc_ring_get_rptr,
+ .get_wptr = vcn_v2_0_enc_ring_get_wptr,
+ .set_wptr = vcn_v2_0_enc_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
+ 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
+ 1, /* vcn_v2_0_enc_ring_insert_end */
+ .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
+ .emit_ib = vcn_v2_0_enc_ring_emit_ib,
+ .emit_fence = vcn_v2_0_enc_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_enc_ring_test_ring,
+ .test_ib = amdgpu_vcn_enc_ring_test_ib,
+ .insert_nop = amdgpu_ring_insert_nop,
+ .insert_end = vcn_v2_0_enc_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .vmhub = AMDGPU_MMHUB,
+ .get_rptr = vcn_v2_0_jpeg_ring_get_rptr,
+ .get_wptr = vcn_v2_0_jpeg_ring_get_wptr,
+ .set_wptr = vcn_v2_0_jpeg_ring_set_wptr,
+ .emit_frame_size =
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */
+ 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */
+ 8 + 16,
+ .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */
+ .emit_ib = vcn_v2_0_jpeg_ring_emit_ib,
+ .emit_fence = vcn_v2_0_jpeg_ring_emit_fence,
+ .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_jpeg_ring_test_ring,
+ .test_ib = amdgpu_vcn_jpeg_ring_test_ib,
+ .insert_nop = vcn_v2_0_jpeg_ring_nop,
+ .insert_start = vcn_v2_0_jpeg_ring_insert_start,
+ .insert_end = vcn_v2_0_jpeg_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg,
+ .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
+static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;
+ DRM_INFO("VCN decode is enabled in VM mode\n");
+}
+
+static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev)
+{
+ int i;
+
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i)
+ adev->vcn.ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs;
+
+ DRM_INFO("VCN encode is enabled in VM mode\n");
+}
+
+static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs;
+ DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
+}
+
+static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {
+ .set = vcn_v2_0_set_interrupt_state,
+ .process = vcn_v2_0_process_interrupt,
+};
+
+static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2;
+ adev->vcn.irq.funcs = &vcn_v2_0_irq_funcs;
+}
+
+const struct amdgpu_ip_block_version vcn_v2_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_VCN,
+ .major = 2,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &vcn_v2_0_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
new file mode 100644
index 000000000000..a74227f4663b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __VCN_V2_0_H__
+#define __VCN_V2_0_H__
+
+extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block;
+
+#endif /* __VCN_V2_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 8d89ab7f0ae8..22260e6963b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -20,7 +20,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
#include "amdgpu.h"
#include "amdgpu_ih.h"
#include "soc15.h"
@@ -48,14 +50,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ }
adev->irq.ih.enabled = true;
if (adev->irq.ih1.ring_size) {
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
RB_ENABLE, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
adev->irq.ih1.enabled = true;
}
@@ -63,7 +80,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
RB_ENABLE, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ }
adev->irq.ih2.enabled = true;
}
}
@@ -81,7 +106,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ }
+
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
@@ -92,7 +125,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
RB_ENABLE, 0);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
@@ -104,7 +145,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
RB_ENABLE, 0);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+ return;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ }
+
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
@@ -187,7 +237,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
!!adev->irq.msi_enabled);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+ DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+ }
/* set the writeback address whether it's enabled or not */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
@@ -214,7 +272,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
WPTR_OVERFLOW_ENABLE, 0);
ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
RB_FULL_DRAIN_ENABLE, 1);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+ }
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
@@ -232,7 +298,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
- WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+
+ if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+ if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+ ih_rb_cntl)) {
+ DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+ return -ETIMEDOUT;
+ }
+ } else {
+ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+ }
/* set rptr, wptr to 0 */
WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 5e5b42a0744a..d40ed1a828dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -20,8 +20,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+
+#include <linux/pci.h>
#include <linux/slab.h>
-#include <drm/drmP.h>
+
#include "amdgpu.h"
#include "amdgpu_atombios.h"
#include "amdgpu_ih.h"
@@ -987,6 +989,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
}
+static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+ uint64_t nak_r, nak_g;
+
+ /* Get the number of NAKs received and generated */
+ nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+ nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+ /* Add the total number of NAKs, i.e the number of replays */
+ return (nak_r + nak_g);
+}
+
static bool vi_need_reset_on_init(struct amdgpu_device *adev)
{
u32 clock_cntl, pc;
@@ -1021,6 +1035,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.init_doorbell_index = &legacy_doorbell_index_init,
.get_pcie_usage = &vi_get_pcie_usage,
.need_reset_on_init = &vi_need_reset_on_init,
+ .get_pcie_replay_count = &vi_get_pcie_replay_count,
};
#define CZ_REV_BRISTOL(rev) \