diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
106 files changed, 24040 insertions, 1031 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index a04f2fc7bf37..f6e5c0282fc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -27,8 +27,7 @@ config DRM_AMDGPU_CIK config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" depends on DRM_AMDGPU - depends on ARCH_HAS_HMM - select HMM_MIRROR + depends on HMM_MIRROR help This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it isn't already selected to enabled full userptr support. diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 57ce44cc3226..56e084367b93 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -54,7 +54,9 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ - amdgpu_vm_sdma.o + amdgpu_vm_sdma.o amdgpu_discovery.o + +amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ @@ -64,7 +66,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ - vega20_reg_init.o nbio_v7_4.o + vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o # add DF block amdgpu-y += \ @@ -75,7 +77,8 @@ amdgpu-y += \ amdgpu-y += \ gmc_v7_0.o \ gmc_v8_0.o \ - gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o + gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o \ + gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o # add IH block amdgpu-y += \ @@ -84,7 +87,8 @@ amdgpu-y += \ iceland_ih.o \ tonga_ih.o \ cz_ih.o \ - vega10_ih.o + vega10_ih.o \ + navi10_ih.o # add PSP block amdgpu-y += \ @@ -108,14 +112,20 @@ amdgpu-y += \ amdgpu_gfx.o \ amdgpu_rlc.o \ gfx_v8_0.o \ - gfx_v9_0.o + gfx_v9_0.o \ + gfx_v10_0.o # add async DMA block amdgpu-y += \ amdgpu_sdma.o \ sdma_v2_4.o \ sdma_v3_0.o \ - sdma_v4_0.o + sdma_v4_0.o \ + sdma_v5_0.o + +# add MES block +amdgpu-y += \ + mes_v10_1.o # add UVD block amdgpu-y += \ @@ -133,7 +143,12 @@ amdgpu-y += \ # add VCN block amdgpu-y += \ amdgpu_vcn.o \ - vcn_v1_0.o + vcn_v1_0.o \ + vcn_v2_0.o + +# add ATHUB block +amdgpu-y += \ + athub_v2_0.o # add amdkfd interfaces amdgpu-y += amdgpu_amdkfd.o @@ -146,7 +161,8 @@ amdgpu-y += \ amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gfx_v8.o \ - amdgpu_amdkfd_gfx_v9.o + amdgpu_amdkfd_gfx_v9.o \ + amdgpu_amdkfd_gfx_v10.o ifneq ($(CONFIG_DRM_AMDGPU_CIK),) amdgpu-y += amdgpu_amdkfd_gfx_v7.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cbcd253d18d5..8199d201b43a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -84,6 +84,8 @@ #include "amdgpu_doorbell.h" #include "amdgpu_amdkfd.h" #include "amdgpu_smu.h" +#include "amdgpu_discovery.h" +#include "amdgpu_mes.h" #define MAX_GPU_INSTANCE 16 @@ -142,7 +144,6 @@ extern uint amdgpu_sdma_phase_quantum; extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern uint amdgpu_pp_feature_mask; -extern int amdgpu_vram_page_split; extern int amdgpu_ngg; extern int amdgpu_prim_buf_per_se; extern int amdgpu_pos_buf_per_se; @@ -155,9 +156,15 @@ extern int amdgpu_gpu_recovery; extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; extern uint amdgpu_dc_feature_mask; +extern uint amdgpu_dm_abm_level; extern struct amdgpu_mgpu_info mgpu_info; extern int amdgpu_ras_enable; extern uint amdgpu_ras_mask; +extern int amdgpu_async_gfx_ring; +extern int amdgpu_mcbp; +extern int amdgpu_discovery; +extern int amdgpu_mes; +extern int amdgpu_noretry; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -213,7 +220,8 @@ struct amdgpu_atif; struct kfd_vm_fault_info; enum amdgpu_cp_irq { - AMDGPU_CP_IRQ_GFX_EOP = 0, + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0, + AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP, AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP, AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP, AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP, @@ -659,6 +667,8 @@ struct amdgpu_nbio_funcs { u32 (*get_memsize)(struct amdgpu_device *adev); void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index, int doorbell_size); + void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index); void (*enable_doorbell_aperture)(struct amdgpu_device *adev, bool enable); void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, @@ -678,7 +688,7 @@ struct amdgpu_nbio_funcs { }; struct amdgpu_df_funcs { - void (*init)(struct amdgpu_device *adev); + void (*sw_init)(struct amdgpu_device *adev); void (*enable_broadcast_mode)(struct amdgpu_device *adev, bool enable); u32 (*get_fb_channel_number)(struct amdgpu_device *adev); @@ -729,6 +739,7 @@ struct amd_powerplay { }; #define AMDGPU_RESET_MAGIC_NUM 64 +#define AMDGPU_MAX_DF_PERFMONS 4 struct amdgpu_device { struct device *dev; struct drm_device *ddev; @@ -755,6 +766,7 @@ struct amdgpu_device { struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; unsigned debugfs_count; #if defined(CONFIG_DEBUG_FS) + struct dentry *debugfs_preempt; struct dentry *debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; #endif struct amdgpu_atif *atif; @@ -764,6 +776,7 @@ struct amdgpu_device { struct mutex grbm_idx_mutex; struct dev_pm_domain vga_pm_domain; bool have_disp_power_ref; + bool have_atomics_support; /* BIOS */ bool is_atom_fw; @@ -905,6 +918,13 @@ struct amdgpu_device { /* display related functionality */ struct amdgpu_display_manager dm; + /* discovery */ + uint8_t *discovery; + + /* mes */ + bool enable_mes; + struct amdgpu_mes mes; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; int num_ip_blocks; struct mutex mn_lock; @@ -959,6 +979,7 @@ struct amdgpu_device { long compute_timeout; uint64_t unique_id; + uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1197,5 +1218,24 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev ); static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; } #endif + +void amdgpu_register_gpu_instance(struct amdgpu_device *adev); +void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev); + #include "amdgpu_object.h" + +/* used by df_v3_6.c and amdgpu_pmu.c */ +#define AMDGPU_PMU_ATTR(_name, _object) \ +static ssize_t \ +_name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_object) >= PAGE_SIZE - 1); \ + return sprintf(page, _object "\n"); \ +} \ + \ +static struct device_attribute pmu_attr_##_name = __ATTR_RO(_name) + #endif + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index c8887a1c852a..9fa4f25a3745 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -78,6 +78,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; case CHIP_VEGA10: @@ -86,6 +87,9 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) case CHIP_RAVEN: kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); break; + case CHIP_NAVI10: + kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions(); + break; default: dev_info(adev->dev, "kfd not supported on this ASIC\n"); return; @@ -158,7 +162,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) /* remove the KIQ bit as well */ if (adev->gfx.kiq.ring.sched.ready) - clear_bit(amdgpu_gfx_queue_to_bit(adev, + clear_bit(amdgpu_gfx_mec_queue_to_bit(adev, adev->gfx.kiq.ring.me - 1, adev->gfx.kiq.ring.pipe, adev->gfx.kiq.ring.queue), @@ -436,9 +440,12 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, if (amdgpu_sriov_vf(adev)) mem_info->mem_clk_max = adev->clock.default_mclk / 100; - else if (adev->powerplay.pp_funcs) - mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; - else + else if (adev->powerplay.pp_funcs) { + if (amdgpu_emu_mode == 1) + mem_info->mem_clk_max = 0; + else + mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; + } else mem_info->mem_clk_max = 100; } @@ -661,6 +668,13 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } +bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->have_atomics_support; +} + #ifndef CONFIG_HSA_AMD bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) { @@ -701,6 +715,11 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) return NULL; } +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void) +{ + return NULL; +} + struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, const struct kfd2kgd_calls *f2g) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f968bf147c5e..b6076d19e442 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -135,10 +135,12 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); +bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c new file mode 100644 index 000000000000..0723f800e815 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -0,0 +1,975 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#undef pr_fmt +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/firmware.h> +#include <linux/mmu_context.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_ucode.h" +#include "soc15_hw_ip.h" +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "navi10_enum.h" +#include "athub/athub_2_0_0_offset.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "oss/osssys_5_0_0_offset.h" +#include "oss/osssys_5_0_0_sh_mask.h" +#include "soc15_common.h" +#include "v10_structs.h" +#include "nv.h" +#include "nvd.h" + +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES, + SAVE_WAVES +}; + +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout); +#if 0 +static uint32_t get_watch_base_addr(struct amdgpu_device *adev); +#endif +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; +#if 0 +/* TODO - confirm REG_GET_FIELD x2, should be OK as is... but + * MC_ARB_RAMCFG register doesn't exist on Vega10 - initial amdgpu + * changes commented out related code, doing the same here for now but + * need to sync with Ken et al + */ + config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); +#endif + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} + +static const struct kfd2kgd_calls kfd2kgd = { + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + get_atc_vmid_pasid_mapping_valid, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .get_tile_config = amdgpu_amdkfd_get_tile_config, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions() +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, mec, pipe, queue, vmid); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static uint32_t get_queue_mask(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id) +{ + unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id) & 31; + + return ((uint32_t)1) << bit; +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + /* APE1 no longer exists on GFX9 */ + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished + * and the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping); + /* + * need to do this twice, once for gfx and once for mmhub + * for ATC add 16 to VMID for mmhub, for IH different registers. + * ATC_VMID0..15 registers are separate from ATC_VMID16..31. + */ + + pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, + pasid_mapping); + +#if 0 + /* TODO: uncomment this code when the hardware support is ready. */ + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << vmid))) + cpu_relax(); + + pr_debug("ATHUB mapping update finished\n"); + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << vmid); +#endif + + /* Mapping vmid to pasid also for IH block */ + pr_debug("update mapping for IH block and mmhub"); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, + pasid_mapping); + + return 0; +} + +/* TODO - RING0 form of field is obsolete, seems to date back to SI + * but still works + */ + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t base[2] = { + SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, + /* On gfx10, mmSDMA1_xxx registers are defined NOT based + * on SDMA1 base address (dw 0x1860) but based on SDMA0 + * base address (dw 0x1260). Therefore use mmSDMA0_RLC0_RB_CNTL + * instead of mmSDMA1_RLC0_RB_CNTL for the base address calc + * below + */ + SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL + }; + uint32_t retval; + + retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - + mmSDMA0_RLC0_RB_CNTL); + + pr_debug("sdma base address: 0x%x\n", retval); + + return retval; +} + +#if 0 +static uint32_t get_watch_base_addr(struct amdgpu_device *adev) +{ + uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) - + mmTCP_WATCH0_ADDR_H; + + pr_debug("kfd: reg watch base address: 0x%x\n", retval); + + return retval; +} +#endif + +static inline struct v10_compute_mqd *get_mqd(void *mqd) +{ + return (struct v10_compute_mqd *)mqd; +} + +static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v10_sdma_mqd *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_compute_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, hqd_base, data; + + m = get_mqd(mqd); + + pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id); + acquire_queue(kgd, pipe_id, queue_id); + + /* HIQ is set during driver init period with vmid set to 0*/ + if (m->cp_hqd_vmid == 0) { + uint32_t value, mec, pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + } + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + + for (reg = hqd_base; + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + WREG32(reg, mqd_hqd[reg - hqd_base]); + + + /* Activate doorbell logic before triggering WPTR poll. */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + + if (wptr) { + /* Don't read wptr with get_user because the user + * context may not be accessible (if this function + * runs in a work queue). Instead trigger a one-shot + * polling read from memory in the CP. This assumes + * that wptr is GPU-accessible in the queue's VMID via + * ATC or SVM. WPTR==RPTR before starting the poll so + * the CP starts fetching new commands from the right + * place. + * + * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit + * tricky. Assume that the queue didn't overflow. The + * number of valid bits in the 32-bit RPTR depends on + * the queue size. The remaining bits are taken from + * the saved 64-bit WPTR. If the WPTR wrapped, add the + * queue size. + */ + uint32_t queue_size = + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, + CP_HQD_PQ_CONTROL, QUEUE_SIZE); + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); + + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) + guessed_wptr += queue_size; + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uint64_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + upper_32_bits((uint64_t)wptr)); + pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + get_queue_mask(adev, pipe_id, queue_id)); + } + + /* Start the EOP fetcher */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + REG_SET_FIELD(m->cp_hqd_eop_rptr, + CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + + release_queue(kgd); + + return 0; +} + +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_sdma_mqd *m; + uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + pr_debug("sdma load base addr %x for engine %d, queue %d\n", sdma_base_addr, m->sdma_engine_id, m->sdma_queue_id); + sdmax_gfx_context_cntl = m->sdma_engine_id ? + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + data = RREG32(sdmax_gfx_context_cntl); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(sdmax_gfx_context_cntl, data); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + pr_debug("sdma dump engine id %d queue_id %d\n", engine_id, queue_id); + pr_debug("sdma base addr %x\n", sdma_base_addr); + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_base_addr + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && + high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + enum hqd_dequeue_request_type type; + unsigned long end_jiffies; + uint32_t temp; + struct v10_compute_mqd *m = get_mqd(mqd); + +#if 0 + unsigned long flags; + int retry; +#endif + + acquire_queue(kgd, pipe_id, queue_id); + + if (m->cp_hqd_vmid == 0) + WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + +#if 0 /* Is this still needed? */ + /* Workaround: If IQ timer is active and the wait time is close to or + * equal to 0, dequeueing is not safe. Wait until either the wait time + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is + * cleared before continuing. Also, ensure wait times are set to at + * least 0x3. + */ + local_irq_save(flags); + preempt_disable(); + retry = 5000; /* wait for 500 usecs at maximum */ + while (true) { + temp = RREG32(mmCP_HQD_IQ_TIMER); + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { + pr_debug("HW is processing IQ\n"); + goto loop; + } + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) + == 3) /* SEM-rearm is safe */ + break; + /* Wait time 3 is safe for CP, but our MMIO read/write + * time is close to 1 microsecond, so check for 10 to + * leave more buffer room + */ + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) + >= 10) + break; + pr_debug("IQ timer is active\n"); + } else + break; +loop: + if (!retry) { + pr_err("CP HQD IQ timer status time out\n"); + break; + } + ndelay(100); + --retry; + } + retry = 1000; + while (true) { + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) + break; + pr_debug("Dequeue request is pending\n"); + + if (!retry) { + pr_err("CP HQD dequeue request time out\n"); + break; + } + ndelay(100); + --retry; + } + local_irq_restore(flags); + preempt_enable(); +#endif + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out.\n"); + release_queue(kgd); + return -ETIME; + } + usleep_range(500, 1000); + } + + release_queue(kgd); + return 0; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v10_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; +} + +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + uint32_t req = (1 << vmid) | + (0 << GCVM_INVALIDATE_ENG0_REQ__FLUSH_TYPE__SHIFT) |/* legacy */ + GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PTES_MASK | + GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE0_MASK | + GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE1_MASK | + GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE2_MASK | + GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L1_PTES_MASK; + + mutex_lock(&adev->srbm_mutex); + + /* Use light weight invalidation. + * + * TODO 1: agree on the right set of invalidation registers for + * KFD use. Use the last one for now. Invalidate only GCHUB as + * SDMA is now moved to GCHUB + * + * TODO 2: support range-based invalidation, requires kfg2kgd + * interface change + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ), req); + + while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK)) & + (1 << vmid))) + cpu_relax(); + + mutex_unlock(&adev->srbm_mutex); +} + +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +{ + signed long r; + uint32_t seq; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_PASID(pasid)); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + if (amdgpu_emu_mode == 0 && ring->sched.ready) + return invalidate_tlbs_with_kiq(adev, pasid); + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { + if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) + == pasid) { + write_vmid_invalidate_request(kgd, vmid); + break; + } + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return 0; + } + + write_vmid_invalidate_request(kgd, vmid); + return 0; +} + +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SA_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return 0; +} + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint64_t base = page_table_base | AMDGPU_PTE_VALID; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID %u\n", + vmid); + return; + } + + /* TODO: take advantage of per-process address space size. For + * now, all processes share the same address space size, like + * on GFX8 and older. + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index df26bf34b675..1d3ee9c42f7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -504,7 +504,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); goto unregister_out; @@ -813,7 +813,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info, ret = amdgpu_sync_resv(NULL, sync, pd->tbo.resv, - AMDGPU_FENCE_OWNER_UNDEFINED, false); + AMDGPU_FENCE_OWNER_KFD, false); if (ret) return ret; } @@ -1729,38 +1729,25 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; /* Get updated user pages */ - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, - bo->tbo.ttm->pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); if (ret) { - bo->tbo.ttm->pages[0] = NULL; - pr_info("%s: Failed to get user pages: %d\n", + pr_debug("%s: Failed to get user pages: %d\n", __func__, ret); - /* Pretend it succeeded. It will fail later - * with a VM fault if the GPU tries to access - * it. Better than hanging indefinitely with - * stalled user mode queues. - */ - } - } - return 0; -} - -/* Remove invalid userptr BOs from hmm track list - * - * Stop HMM track the userptr update - */ -static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info) -{ - struct kgd_mem *mem, *tmp_mem; - struct amdgpu_bo *bo; + /* Return error -EBUSY or -ENOMEM, retry restore */ + return ret; + } - list_for_each_entry_safe(mem, tmp_mem, - &process_info->userptr_inval_list, - validate_list.head) { - bo = mem->bo; amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + + /* Mark the BO as valid unless it was invalidated + * again concurrently. + */ + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) + return -EAGAIN; } + + return 0; } /* Validate invalid userptr BOs @@ -1842,13 +1829,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); - /* Stop HMM track the userptr update. We dont check the return - * value for concurrent CPU page table update because we will - * reschedule the restore worker if process_info->evicted_bos - * is updated. - */ - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); - /* Update mapping. If the BO was not validated * (because we couldn't get user pages), this will * clear the page table entries, which will result in @@ -1947,7 +1927,6 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) } unlock_out: - untrack_invalid_user_pages(process_info); mutex_unlock(&process_info->lock); mmput(mm); put_task_struct(usertask); @@ -2153,12 +2132,16 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem * Add process eviction fence to bo so they can * evict each other. */ + ret = reservation_object_reserve_shared(gws_bo->tbo.resv, 1); + if (ret) + goto reserve_shared_fail; amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); amdgpu_bo_unreserve(gws_bo); mutex_unlock(&(*mem)->process_info->lock); return ret; +reserve_shared_fail: bo_validation_failure: amdgpu_bo_unreserve(gws_bo); bo_reservation_failure: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index a2dbdf13c4c7..daf687428cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -118,6 +118,7 @@ union umc_info { union vram_info { struct atom_vram_info_header_v2_3 v23; + struct atom_vram_info_header_v2_4 v24; }; /* * Return vram width from integrated system info table, if available, @@ -126,22 +127,50 @@ union vram_info { int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev) { struct amdgpu_mode_info *mode_info = &adev->mode_info; - int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - integratedsysteminfo); + int index; u16 data_offset, size; union igp_info *igp_info; + union vram_info *vram_info; + u32 mem_channel_number; + u32 mem_channel_width; u8 frev, crev; + if (adev->flags & AMD_IS_APU) + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + integratedsysteminfo); + else + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + vram_info); + /* get any igp specific overrides */ if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, &frev, &crev, &data_offset)) { - igp_info = (union igp_info *) - (mode_info->atom_context->bios + data_offset); - switch (crev) { - case 11: - return igp_info->v11.umachannelnumber * 64; - default: - return 0; + if (adev->flags & AMD_IS_APU) { + igp_info = (union igp_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 11: + mem_channel_number = igp_info->v11.umachannelnumber; + /* channel width is 64 */ + return mem_channel_number * 64; + default: + return 0; + } + } else { + vram_info = (union vram_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 3: + mem_channel_number = vram_info->v23.vram_module[0].channel_num; + mem_channel_width = vram_info->v23.vram_module[0].channel_width; + return mem_channel_number * (1 << mem_channel_width); + case 4: + mem_channel_number = vram_info->v24.vram_module[0].channel_num; + mem_channel_width = vram_info->v24.vram_module[0].channel_width; + return mem_channel_number * (1 << mem_channel_width); + default: + return 0; + } } } @@ -179,6 +208,9 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, case ATOM_DGPU_VRAM_TYPE_HBM2: vram_type = AMDGPU_VRAM_TYPE_HBM; break; + case ATOM_DGPU_VRAM_TYPE_GDDR6: + vram_type = AMDGPU_VRAM_TYPE_GDDR6; + break; default: vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; break; @@ -227,6 +259,9 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) case 3: mem_type = vram_info->v23.vram_module[0].memory_type; return convert_atom_mem_type_to_vram_type(adev, mem_type); + case 4: + mem_type = vram_info->v24.vram_module[0].memory_type; + return convert_atom_mem_type_to_vram_type(adev, mem_type); default: return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dc63707e426f..e069de8b54e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -633,7 +633,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return -ENOMEM; } - r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); + r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages); if (r) { kvfree(e->user_pages); e->user_pages = NULL; @@ -650,7 +650,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates, true); + &duplicates, false); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); @@ -673,16 +673,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } r = amdgpu_cs_list_validate(p, &duplicates); - if (r) { - DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n"); + if (r) goto error_validate; - } r = amdgpu_cs_list_validate(p, &p->validated); - if (r) { - DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n"); + if (r) goto error_validate; - } amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); @@ -878,7 +874,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - if (amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { struct dma_fence *f; bo_va = fpriv->csa_va; @@ -967,7 +963,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) continue; - if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) { + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && + (amdgpu_mcbp || amdgpu_sriov_vf(adev))) { if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) ce_preempt++; @@ -1385,7 +1382,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) { if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); - else if (r != -ERESTARTSYS) + else if (r != -ERESTARTSYS && r != -EAGAIN) DRM_ERROR("Failed to process the buffer list %d!\n", r); goto out; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 06f83cac0d3a..35a8d3c96fc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -47,6 +47,7 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo return -ENOMEM; memset(ptr, 0, size); + adev->virt.csa_cpu_addr = ptr; return 0; } @@ -79,7 +80,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, list_add(&csa_tv.head, &list); amdgpu_vm_get_pd_bo(vm, &list, &pd); - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, true); + r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, false); if (r) { DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f255a00c4492..6d54decef7f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -106,10 +106,10 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, ssize_t result = 0; int r; bool pm_pg_lock, use_bank, use_ring; - unsigned instance_bank, sh_bank, se_bank, me, pipe, queue; + unsigned instance_bank, sh_bank, se_bank, me, pipe, queue, vmid; pm_pg_lock = use_bank = use_ring = false; - instance_bank = sh_bank = se_bank = me = pipe = queue = 0; + instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0; if (size & 0x3 || *pos & 0x3 || ((*pos & (1ULL << 62)) && (*pos & (1ULL << 61)))) @@ -135,6 +135,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, me = (*pos & GENMASK_ULL(33, 24)) >> 24; pipe = (*pos & GENMASK_ULL(43, 34)) >> 34; queue = (*pos & GENMASK_ULL(53, 44)) >> 44; + vmid = (*pos & GENMASK_ULL(58, 54)) >> 54; use_ring = 1; } else { @@ -152,7 +153,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, sh_bank, instance_bank); } else if (use_ring) { mutex_lock(&adev->srbm_mutex); - amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue); + amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid); } if (pm_pg_lock) @@ -185,7 +186,7 @@ end: amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); } else if (use_ring) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0); + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } @@ -923,17 +924,195 @@ static const struct drm_info_list amdgpu_debugfs_list[] = { {"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt}, }; +static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring, + struct dma_fence **fences) +{ + struct amdgpu_fence_driver *drv = &ring->fence_drv; + uint32_t sync_seq, last_seq; + + last_seq = atomic_read(&ring->fence_drv.last_seq); + sync_seq = ring->fence_drv.sync_seq; + + last_seq &= drv->num_fences_mask; + sync_seq &= drv->num_fences_mask; + + do { + struct dma_fence *fence, **ptr; + + ++last_seq; + last_seq &= drv->num_fences_mask; + ptr = &drv->fences[last_seq]; + + fence = rcu_dereference_protected(*ptr, 1); + RCU_INIT_POINTER(*ptr, NULL); + + if (!fence) + continue; + + fences[last_seq] = fence; + + } while (last_seq != sync_seq); +} + +static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences, + int length) +{ + int i; + struct dma_fence *fence; + + for (i = 0; i < length; i++) { + fence = fences[i]; + if (!fence) + continue; + dma_fence_signal(fence); + dma_fence_put(fence); + } +} + +static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *s_job; + struct dma_fence *fence; + + spin_lock(&sched->job_list_lock); + list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + fence = sched->ops->run_job(s_job); + dma_fence_put(fence); + } + spin_unlock(&sched->job_list_lock); +} + +static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring) +{ + struct amdgpu_job *job; + struct drm_sched_job *s_job; + uint32_t preempt_seq; + struct dma_fence *fence, **ptr; + struct amdgpu_fence_driver *drv = &ring->fence_drv; + struct drm_gpu_scheduler *sched = &ring->sched; + + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) + return; + + preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2)); + if (preempt_seq <= atomic_read(&drv->last_seq)) + return; + + preempt_seq &= drv->num_fences_mask; + ptr = &drv->fences[preempt_seq]; + fence = rcu_dereference_protected(*ptr, 1); + + spin_lock(&sched->job_list_lock); + list_for_each_entry(s_job, &sched->ring_mirror_list, node) { + job = to_amdgpu_job(s_job); + if (job->fence == fence) + /* mark the job as preempted */ + job->preemption_status |= AMDGPU_IB_PREEMPTED; + } + spin_unlock(&sched->job_list_lock); +} + +static int amdgpu_debugfs_ib_preempt(void *data, u64 val) +{ + int r, resched, length; + struct amdgpu_ring *ring; + struct dma_fence **fences = NULL; + struct amdgpu_device *adev = (struct amdgpu_device *)data; + + if (val >= AMDGPU_MAX_RINGS) + return -EINVAL; + + ring = adev->rings[val]; + + if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread) + return -EINVAL; + + /* the last preemption failed */ + if (ring->trail_seq != le32_to_cpu(*ring->trail_fence_cpu_addr)) + return -EBUSY; + + length = ring->fence_drv.num_fences_mask + 1; + fences = kcalloc(length, sizeof(void *), GFP_KERNEL); + if (!fences) + return -ENOMEM; + + /* stop the scheduler */ + kthread_park(ring->sched.thread); + + resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); + + /* preempt the IB */ + r = amdgpu_ring_preempt_ib(ring); + if (r) { + DRM_WARN("failed to preempt ring %d\n", ring->idx); + goto failure; + } + + amdgpu_fence_process(ring); + + if (atomic_read(&ring->fence_drv.last_seq) != + ring->fence_drv.sync_seq) { + DRM_INFO("ring %d was preempted\n", ring->idx); + + amdgpu_ib_preempt_mark_partial_job(ring); + + /* swap out the old fences */ + amdgpu_ib_preempt_fences_swap(ring, fences); + + amdgpu_fence_driver_force_completion(ring); + + /* resubmit unfinished jobs */ + amdgpu_ib_preempt_job_recovery(&ring->sched); + + /* wait for jobs finished */ + amdgpu_fence_wait_empty(ring); + + /* signal the old fences */ + amdgpu_ib_preempt_signal_fences(fences, length); + } + +failure: + /* restart the scheduler */ + kthread_unpark(ring->sched.thread); + + ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); + + if (fences) + kfree(fences); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL, + amdgpu_debugfs_ib_preempt, "%llu\n"); + int amdgpu_debugfs_init(struct amdgpu_device *adev) { + adev->debugfs_preempt = + debugfs_create_file("amdgpu_preempt_ib", 0600, + adev->ddev->primary->debugfs_root, + (void *)adev, &fops_ib_preempt); + if (!(adev->debugfs_preempt)) { + DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n"); + return -EIO; + } + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list, ARRAY_SIZE(amdgpu_debugfs_list)); } +void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) +{ + if (adev->debugfs_preempt) + debugfs_remove(adev->debugfs_preempt); +} + #else int amdgpu_debugfs_init(struct amdgpu_device *adev) { return 0; } +void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) { } int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 8260d8073c26..f289d28ad6b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -34,6 +34,7 @@ struct amdgpu_debugfs { int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); int amdgpu_debugfs_init(struct amdgpu_device *adev); +void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev); int amdgpu_debugfs_add_files(struct amdgpu_device *adev, const struct drm_info_list *files, unsigned nfiles); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a65c0661253a..5a7f893cf724 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -52,6 +52,7 @@ #endif #include "vi.h" #include "soc15.h" +#include "nv.h" #include "bif/bif_4_1_d.h" #include <linux/pci.h> #include <linux/firmware.h> @@ -62,12 +63,14 @@ #include "amdgpu_xgmi.h" #include "amdgpu_ras.h" +#include "amdgpu_pmu.h" MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -95,6 +98,7 @@ static const char *amdgpu_asic_name[] = { "VEGA12", "VEGA20", "RAVEN", + "NAVI10", "LAST", }; @@ -507,7 +511,10 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, } else { tmp = RREG32(reg); tmp &= ~and_mask; - tmp |= or_mask; + if (adev->family >= AMDGPU_FAMILY_AI) + tmp |= (or_mask & and_mask); + else + tmp |= or_mask; } WREG32(reg, tmp); } @@ -974,13 +981,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_device_check_block_size(adev); - if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 || - !is_power_of_2(amdgpu_vram_page_split))) { - dev_warn(adev->dev, "invalid VRAM page split (%d)\n", - amdgpu_vram_page_split); - amdgpu_vram_page_split = 1024; - } - ret = amdgpu_device_get_job_timeout_settings(adev); if (ret) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); @@ -1384,6 +1384,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) else chip_name = "raven"; break; + case CHIP_NAVI10: + chip_name = "navi10"; + break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -1430,6 +1433,23 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); + if (hdr->version_minor >= 1) { + const struct gpu_info_firmware_v1_1 *gpu_info_fw = + (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + adev->gfx.config.num_sc_per_sh = + le32_to_cpu(gpu_info_fw->num_sc_per_sh); + adev->gfx.config.num_packer_per_sc = + le32_to_cpu(gpu_info_fw->num_packer_per_sc); + } +#ifdef CONFIG_DRM_AMD_DC_DCN2_0 + if (hdr->version_minor == 2) { + const struct gpu_info_firmware_v1_2 *gpu_info_fw = + (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; + } +#endif break; } default: @@ -1518,6 +1538,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; break; + case CHIP_NAVI10: + adev->family = AMDGPU_FAMILY_NV; + + r = nv_set_ip_blocks(adev); + if (r) + return r; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -1542,17 +1569,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; - /* Read BIOS */ - if (!amdgpu_get_bios(adev)) - return -EINVAL; - - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - return r; - } - for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { DRM_ERROR("disabled ip block: %d <%s>\n", @@ -1574,6 +1590,19 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.valid = true; } } + /* get the vbios after the asic_funcs are set up */ + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { + /* Read BIOS */ + if (!amdgpu_get_bios(adev)) + return -EINVAL; + + r = amdgpu_atombios_init(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); + return r; + } + } } adev->cg_flags &= amdgpu_cg_mask; @@ -1713,7 +1742,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = true; /* right after GMC hw init, we create CSA */ - if (amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_CSA_SIZE); @@ -2395,6 +2424,9 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case CHIP_RAVEN: #endif +#if defined(CONFIG_DRM_AMD_DC_DCN2_0) + case CHIP_NAVI10: +#endif return amdgpu_dc != 0; #endif default: @@ -2505,6 +2537,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, hash_init(adev->mn_hash); mutex_init(&adev->lock_reset); mutex_init(&adev->virt.dpm_mutex); + mutex_init(&adev->psp.mutex); r = amdgpu_device_check_arguments(adev); if (r) @@ -2564,8 +2597,33 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (adev->rio_mem == NULL) DRM_INFO("PCI I/O BAR is not found.\n"); + /* enable PCIE atomic ops */ + r = pci_enable_atomic_ops_to_root(adev->pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64); + if (r) { + adev->have_atomics_support = false; + DRM_INFO("PCIE atomic ops is not supported\n"); + } else { + adev->have_atomics_support = true; + } + amdgpu_device_get_pcie_info(adev); + if (amdgpu_mcbp) + DRM_INFO("MCBP is enabled\n"); + + if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10) + adev->enable_mes = true; + + if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) { + r = amdgpu_discovery_init(adev); + if (r) { + dev_err(adev->dev, "amdgpu_discovery_init failed\n"); + return r; + } + } + /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) @@ -2690,6 +2748,9 @@ fence_driver_init: amdgpu_fbdev_init(adev); + if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) + amdgpu_pm_virt_sysfs_init(adev); + r = amdgpu_pm_sysfs_init(adev); if (r) DRM_ERROR("registering pm debugfs failed (%d).\n", r); @@ -2749,6 +2810,11 @@ fence_driver_init: return r; } + if (IS_ENABLED(CONFIG_PERF_EVENTS)) + r = amdgpu_pmu_init(adev); + if (r) + dev_err(adev->dev, "amdgpu_pmu_init failed\n"); + return 0; failed: @@ -2811,9 +2877,17 @@ void amdgpu_device_fini(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; amdgpu_device_doorbell_fini(adev); + if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) + amdgpu_pm_virt_sysfs_fini(adev); + amdgpu_debugfs_regs_cleanup(adev); device_remove_file(adev->dev, &dev_attr_pcie_replay_count); amdgpu_ucode_sysfs_fini(adev); + if (IS_ENABLED(CONFIG_PERF_EVENTS)) + amdgpu_pmu_fini(adev); + amdgpu_debugfs_preempt_cleanup(adev); + if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) + amdgpu_discovery_fini(adev); } @@ -3499,6 +3573,12 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (vram_lost) amdgpu_device_fill_reset_magic(tmp_adev); + /* + * Add this ASIC as tracked as reset was already + * complete successfully. + */ + amdgpu_register_gpu_instance(tmp_adev); + r = amdgpu_device_ip_late_init(tmp_adev); if (r) goto out; @@ -3633,8 +3713,19 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, device_list_handle = &device_list; } + /* + * Mark these ASICs to be reseted as untracked first + * And add them back after reset completed + */ + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) + amdgpu_unregister_gpu_instance(tmp_adev); + /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + /* disable ras on ALL IPs */ + if (amdgpu_device_ip_need_full_reset(tmp_adev)) + amdgpu_ras_suspend(tmp_adev); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c new file mode 100644 index 000000000000..1481899f86c1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -0,0 +1,415 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_discovery.h" +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "nbio/nbio_2_3_offset.h" +#include "discovery.h" + +#define mmRCC_CONFIG_MEMSIZE 0xde3 +#define mmMM_INDEX 0x0 +#define mmMM_INDEX_HI 0x6 +#define mmMM_DATA 0x1 +#define HW_ID_MAX 300 + +const char *hw_id_names[HW_ID_MAX] = { + [MP1_HWID] = "MP1", + [MP2_HWID] = "MP2", + [THM_HWID] = "THM", + [SMUIO_HWID] = "SMUIO", + [FUSE_HWID] = "FUSE", + [CLKA_HWID] = "CLKA", + [PWR_HWID] = "PWR", + [GC_HWID] = "GC", + [UVD_HWID] = "UVD", + [AUDIO_AZ_HWID] = "AUDIO_AZ", + [ACP_HWID] = "ACP", + [DCI_HWID] = "DCI", + [DMU_HWID] = "DMU", + [DCO_HWID] = "DCO", + [DIO_HWID] = "DIO", + [XDMA_HWID] = "XDMA", + [DCEAZ_HWID] = "DCEAZ", + [DAZ_HWID] = "DAZ", + [SDPMUX_HWID] = "SDPMUX", + [NTB_HWID] = "NTB", + [IOHC_HWID] = "IOHC", + [L2IMU_HWID] = "L2IMU", + [VCE_HWID] = "VCE", + [MMHUB_HWID] = "MMHUB", + [ATHUB_HWID] = "ATHUB", + [DBGU_NBIO_HWID] = "DBGU_NBIO", + [DFX_HWID] = "DFX", + [DBGU0_HWID] = "DBGU0", + [DBGU1_HWID] = "DBGU1", + [OSSSYS_HWID] = "OSSSYS", + [HDP_HWID] = "HDP", + [SDMA0_HWID] = "SDMA0", + [SDMA1_HWID] = "SDMA1", + [ISP_HWID] = "ISP", + [DBGU_IO_HWID] = "DBGU_IO", + [DF_HWID] = "DF", + [CLKB_HWID] = "CLKB", + [FCH_HWID] = "FCH", + [DFX_DAP_HWID] = "DFX_DAP", + [L1IMU_PCIE_HWID] = "L1IMU_PCIE", + [L1IMU_NBIF_HWID] = "L1IMU_NBIF", + [L1IMU_IOAGR_HWID] = "L1IMU_IOAGR", + [L1IMU3_HWID] = "L1IMU3", + [L1IMU4_HWID] = "L1IMU4", + [L1IMU5_HWID] = "L1IMU5", + [L1IMU6_HWID] = "L1IMU6", + [L1IMU7_HWID] = "L1IMU7", + [L1IMU8_HWID] = "L1IMU8", + [L1IMU9_HWID] = "L1IMU9", + [L1IMU10_HWID] = "L1IMU10", + [L1IMU11_HWID] = "L1IMU11", + [L1IMU12_HWID] = "L1IMU12", + [L1IMU13_HWID] = "L1IMU13", + [L1IMU14_HWID] = "L1IMU14", + [L1IMU15_HWID] = "L1IMU15", + [WAFLC_HWID] = "WAFLC", + [FCH_USB_PD_HWID] = "FCH_USB_PD", + [PCIE_HWID] = "PCIE", + [PCS_HWID] = "PCS", + [DDCL_HWID] = "DDCL", + [SST_HWID] = "SST", + [IOAGR_HWID] = "IOAGR", + [NBIF_HWID] = "NBIF", + [IOAPIC_HWID] = "IOAPIC", + [SYSTEMHUB_HWID] = "SYSTEMHUB", + [NTBCCP_HWID] = "NTBCCP", + [UMC_HWID] = "UMC", + [SATA_HWID] = "SATA", + [USB_HWID] = "USB", + [CCXSEC_HWID] = "CCXSEC", + [XGMI_HWID] = "XGMI", + [XGBE_HWID] = "XGBE", + [MP0_HWID] = "MP0", +}; + +static int hw_id_map[MAX_HWIP] = { + [GC_HWIP] = GC_HWID, + [HDP_HWIP] = HDP_HWID, + [SDMA0_HWIP] = SDMA0_HWID, + [SDMA1_HWIP] = SDMA1_HWID, + [MMHUB_HWIP] = MMHUB_HWID, + [ATHUB_HWIP] = ATHUB_HWID, + [NBIO_HWIP] = NBIF_HWID, + [MP0_HWIP] = MP0_HWID, + [MP1_HWIP] = MP1_HWID, + [UVD_HWIP] = UVD_HWID, + [VCE_HWIP] = VCE_HWID, + [DF_HWIP] = DF_HWID, + [DCE_HWIP] = DMU_HWID, + [OSSSYS_HWIP] = OSSSYS_HWID, + [SMUIO_HWIP] = SMUIO_HWID, + [PWR_HWIP] = PWR_HWID, + [NBIF_HWIP] = NBIF_HWID, + [THM_HWIP] = THM_HWID, + [CLK_HWIP] = CLKA_HWID, +}; + +static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) +{ + uint32_t *p = (uint32_t *)binary; + uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; + uint64_t pos = vram_size - BINARY_MAX_SIZE; + unsigned long flags; + + while (pos < vram_size) { + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31); + *p++ = RREG32_NO_KIQ(mmMM_DATA); + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + pos += 4; + } + + return 0; +} + +static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size) +{ + uint16_t checksum = 0; + int i; + + for (i = 0; i < size; i++) + checksum += data[i]; + + return checksum; +} + +static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size, + uint16_t expected) +{ + return !!(amdgpu_discovery_calculate_checksum(data, size) == expected); +} + +int amdgpu_discovery_init(struct amdgpu_device *adev) +{ + struct table_info *info; + struct binary_header *bhdr; + struct ip_discovery_header *ihdr; + struct gpu_info_header *ghdr; + uint16_t offset; + uint16_t size; + uint16_t checksum; + int r; + + adev->discovery = kzalloc(BINARY_MAX_SIZE, GFP_KERNEL); + if (!adev->discovery) + return -ENOMEM; + + r = amdgpu_discovery_read_binary(adev, adev->discovery); + if (r) { + DRM_ERROR("failed to read ip discovery binary\n"); + goto out; + } + + bhdr = (struct binary_header *)adev->discovery; + + if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) { + DRM_ERROR("invalid ip discovery binary signature\n"); + r = -EINVAL; + goto out; + } + + offset = offsetof(struct binary_header, binary_checksum) + + sizeof(bhdr->binary_checksum); + size = bhdr->binary_size - offset; + checksum = bhdr->binary_checksum; + + if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, + size, checksum)) { + DRM_ERROR("invalid ip discovery binary checksum\n"); + r = -EINVAL; + goto out; + } + + info = &bhdr->table_list[IP_DISCOVERY]; + offset = le16_to_cpu(info->offset); + checksum = le16_to_cpu(info->checksum); + ihdr = (struct ip_discovery_header *)(adev->discovery + offset); + + if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) { + DRM_ERROR("invalid ip discovery data table signature\n"); + r = -EINVAL; + goto out; + } + + if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, + ihdr->size, checksum)) { + DRM_ERROR("invalid ip discovery data table checksum\n"); + r = -EINVAL; + goto out; + } + + info = &bhdr->table_list[GC]; + offset = le16_to_cpu(info->offset); + checksum = le16_to_cpu(info->checksum); + ghdr = (struct gpu_info_header *)(adev->discovery + offset); + + if (!amdgpu_discovery_verify_checksum(adev->discovery + offset, + ghdr->size, checksum)) { + DRM_ERROR("invalid gc data table checksum\n"); + r = -EINVAL; + goto out; + } + + return 0; + +out: + kfree(adev->discovery); + adev->discovery = NULL; + + return r; +} + +void amdgpu_discovery_fini(struct amdgpu_device *adev) +{ + kfree(adev->discovery); + adev->discovery = NULL; +} + +int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) +{ + struct binary_header *bhdr; + struct ip_discovery_header *ihdr; + struct die_header *dhdr; + struct ip *ip; + uint16_t die_offset; + uint16_t ip_offset; + uint16_t num_dies; + uint16_t num_ips; + uint8_t num_base_address; + int hw_ip; + int i, j, k; + + if (!adev->discovery) { + DRM_ERROR("ip discovery uninitialized\n"); + return -EINVAL; + } + + bhdr = (struct binary_header *)adev->discovery; + ihdr = (struct ip_discovery_header *)(adev->discovery + + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); + num_dies = le16_to_cpu(ihdr->num_dies); + + DRM_DEBUG("number of dies: %d\n", num_dies); + + for (i = 0; i < num_dies; i++) { + die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); + dhdr = (struct die_header *)(adev->discovery + die_offset); + num_ips = le16_to_cpu(dhdr->num_ips); + ip_offset = die_offset + sizeof(*dhdr); + + if (le16_to_cpu(dhdr->die_id) != i) { + DRM_ERROR("invalid die id %d, expected %d\n", + le16_to_cpu(dhdr->die_id), i); + return -EINVAL; + } + + DRM_DEBUG("number of hardware IPs on die%d: %d\n", + le16_to_cpu(dhdr->die_id), num_ips); + + for (j = 0; j < num_ips; j++) { + ip = (struct ip *)(adev->discovery + ip_offset); + num_base_address = ip->num_base_address; + + DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n", + hw_id_names[le16_to_cpu(ip->hw_id)], + le16_to_cpu(ip->hw_id), + ip->number_instance, + ip->major, ip->minor, + ip->revision); + + for (k = 0; k < num_base_address; k++) { + /* + * convert the endianness of base addresses in place, + * so that we don't need to convert them when accessing adev->reg_offset. + */ + ip->base_address[k] = le32_to_cpu(ip->base_address[k]); + DRM_DEBUG("\t0x%08x\n", ip->base_address[k]); + } + + for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) { + if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) { + DRM_INFO("set register base offset for %s\n", + hw_id_names[le16_to_cpu(ip->hw_id)]); + adev->reg_offset[hw_ip][ip->number_instance] = + ip->base_address; + } + + } + + ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + } + } + + return 0; +} + +int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, + int *major, int *minor) +{ + struct binary_header *bhdr; + struct ip_discovery_header *ihdr; + struct die_header *dhdr; + struct ip *ip; + uint16_t die_offset; + uint16_t ip_offset; + uint16_t num_dies; + uint16_t num_ips; + int i, j; + + if (!adev->discovery) { + DRM_ERROR("ip discovery uninitialized\n"); + return -EINVAL; + } + + bhdr = (struct binary_header *)adev->discovery; + ihdr = (struct ip_discovery_header *)(adev->discovery + + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); + num_dies = le16_to_cpu(ihdr->num_dies); + + for (i = 0; i < num_dies; i++) { + die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); + dhdr = (struct die_header *)(adev->discovery + die_offset); + num_ips = le16_to_cpu(dhdr->num_ips); + ip_offset = die_offset + sizeof(*dhdr); + + for (j = 0; j < num_ips; j++) { + ip = (struct ip *)(adev->discovery + ip_offset); + + if (le16_to_cpu(ip->hw_id) == hw_id) { + if (major) + *major = ip->major; + if (minor) + *minor = ip->minor; + return 0; + } + ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + } + } + + return -EINVAL; +} + +int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) +{ + struct binary_header *bhdr; + struct gc_info_v1_0 *gc_info; + + if (!adev->discovery) { + DRM_ERROR("ip discovery uninitialized\n"); + return -EINVAL; + } + + bhdr = (struct binary_header *)adev->discovery; + gc_info = (struct gc_info_v1_0 *)(adev->discovery + + le16_to_cpu(bhdr->table_list[GC].offset)); + + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); + adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + + le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) / + le32_to_cpu(gc_info->gc_num_sa_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h new file mode 100644 index 000000000000..85b8c4d4d576 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -0,0 +1,34 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DISCOVERY__ +#define __AMDGPU_DISCOVERY__ + +int amdgpu_discovery_init(struct amdgpu_device *adev); +void amdgpu_discovery_fini(struct amdgpu_device *adev); +int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev); +int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, + int *major, int *minor); +int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev); + +#endif /* __AMDGPU_DISCOVERY__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 68959b923f89..790263dcc064 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -51,6 +51,7 @@ struct amdgpu_doorbell_index { uint32_t userqueue_start; uint32_t userqueue_end; uint32_t gfx_ring0; + uint32_t gfx_ring1; uint32_t sdma_engine[8]; uint32_t ih; union { @@ -153,6 +154,45 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT; +typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT +{ + /* Compute + GFX: 0~255 */ + AMDGPU_NAVI10_DOORBELL_KIQ = 0x000, + AMDGPU_NAVI10_DOORBELL_HIQ = 0x001, + AMDGPU_NAVI10_DOORBELL_DIQ = 0x002, + AMDGPU_NAVI10_DOORBELL_MEC_RING0 = 0x003, + AMDGPU_NAVI10_DOORBELL_MEC_RING1 = 0x004, + AMDGPU_NAVI10_DOORBELL_MEC_RING2 = 0x005, + AMDGPU_NAVI10_DOORBELL_MEC_RING3 = 0x006, + AMDGPU_NAVI10_DOORBELL_MEC_RING4 = 0x007, + AMDGPU_NAVI10_DOORBELL_MEC_RING5 = 0x008, + AMDGPU_NAVI10_DOORBELL_MEC_RING6 = 0x009, + AMDGPU_NAVI10_DOORBELL_MEC_RING7 = 0x00A, + AMDGPU_NAVI10_DOORBELL_USERQUEUE_START = 0x00B, + AMDGPU_NAVI10_DOORBELL_USERQUEUE_END = 0x08A, + AMDGPU_NAVI10_DOORBELL_GFX_RING0 = 0x08B, + AMDGPU_NAVI10_DOORBELL_GFX_RING1 = 0x08C, + /* SDMA:256~335*/ + AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 = 0x100, + AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1 = 0x10A, + /* IH: 376~391 */ + AMDGPU_NAVI10_DOORBELL_IH = 0x178, + /* MMSCH: 392~407 + * overlap the doorbell assignment with VCN as they are mutually exclusive + * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + */ + AMDGPU_NAVI10_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_NAVI10_DOORBELL64_VCN2_3 = 0x189, + AMDGPU_NAVI10_DOORBELL64_VCN4_5 = 0x18A, + AMDGPU_NAVI10_DOORBELL64_VCN6_7 = 0x18B, + + AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0, + AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCN6_7, + + AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = 0x18F, + AMDGPU_NAVI10_DOORBELL_INVALID = 0xFFFF +} AMDGPU_NAVI10_DOORBELL_ASSIGNMENT; + /* * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index eedecaf4c804..61bd10310604 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -906,16 +906,63 @@ amdgpu_get_vce_clock_state(void *handle, u32 idx) int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low) { - if (is_support_sw_smu(adev)) - return smu_get_sclk(&adev->smu, low); - else + uint32_t clk_freq; + int ret = 0; + if (is_support_sw_smu(adev)) { + ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK, + low ? &clk_freq : NULL, + !low ? &clk_freq : NULL); + if (ret) + return 0; + return clk_freq * 100; + + } else { return (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (low)); + } } int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low) { - if (is_support_sw_smu(adev)) - return smu_get_mclk(&adev->smu, low); - else + uint32_t clk_freq; + int ret = 0; + if (is_support_sw_smu(adev)) { + ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK, + low ? &clk_freq : NULL, + !low ? &clk_freq : NULL); + if (ret) + return 0; + return clk_freq * 100; + + } else { return (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (low)); + } +} + +int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block_type, bool gate) +{ + int ret = 0; + bool swsmu = is_support_sw_smu(adev); + + switch (block_type) { + case AMD_IP_BLOCK_TYPE_GFX: + case AMD_IP_BLOCK_TYPE_UVD: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_VCE: + if (swsmu) + ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); + else + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + break; + case AMD_IP_BLOCK_TYPE_GMC: + case AMD_IP_BLOCK_TYPE_ACP: + case AMD_IP_BLOCK_TYPE_SDMA: + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate)); + break; + default: + break; + } + + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 521dbd0d9af8..1c5c0fd76dbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -355,10 +355,6 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ (adev)->powerplay.pp_handle, msg_id)) -#define amdgpu_dpm_set_powergating_by_smu(adev, block_type, gate) \ - ((adev)->powerplay.pp_funcs->set_powergating_by_smu(\ - (adev)->powerplay.pp_handle, block_type, gate)) - #define amdgpu_dpm_get_power_profile_mode(adev, buf) \ ((adev)->powerplay.pp_funcs->get_power_profile_mode(\ (adev)->powerplay.pp_handle, buf)) @@ -520,6 +516,9 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev, struct amd_vce_state* amdgpu_get_vce_clock_state(void *handle, u32 idx); +int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, + uint32_t block_type, bool gate); + extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low); extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8e1b269351e8..f67b5baed441 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -78,9 +78,10 @@ * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE. * - 3.31.0 - Add support for per-flip tiling attribute changes with DC * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS. + * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 32 +#define KMS_DRIVER_MINOR 33 #define KMS_DRIVER_PATCHLEVEL 0 #define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 @@ -110,7 +111,6 @@ int amdgpu_vm_fragment_size = -1; int amdgpu_vm_block_size = -1; int amdgpu_vm_fault_stop = 0; int amdgpu_vm_debug = 0; -int amdgpu_vram_page_split = 512; int amdgpu_vm_update_mode = -1; int amdgpu_exp_hw_support = 0; int amdgpu_dc = -1; @@ -138,6 +138,11 @@ int amdgpu_emu_mode = 0; uint amdgpu_smu_memory_pool_size = 0; /* FBC (bit 0) disabled by default*/ uint amdgpu_dc_feature_mask = 0; +int amdgpu_async_gfx_ring = 1; +int amdgpu_mcbp = 0; +int amdgpu_discovery = -1; +int amdgpu_mes = 0; +int amdgpu_noretry; struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), @@ -243,13 +248,16 @@ module_param_named(msi, amdgpu_msi, int, 0444); * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) * jobs is 10000. And there is no timeout enforced on compute jobs. */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and no timeout for compute jobs), " +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and infinity timeout for compute jobs." + " 0: keep default value. negative: infinity timeout), " "format is [Non-Compute] or [GFX,Compute,SDMA,Video]"); module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); /** * DOC: dpm (int) - * Override for dynamic power management setting (1 = enable, 0 = disable). The default is -1 (auto). + * Override for dynamic power management setting + * (0 = disable, 1 = enable, 2 = enable sw smu driver for vega20) + * The default is -1 (auto). */ MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(dpm, amdgpu_dpm, int, 0444); @@ -345,13 +353,6 @@ MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); /** - * DOC: vram_page_split (int) - * Override the number of pages after we split VRAM allocations (default 512, -1 = disable). The default is 512. - */ -MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)"); -module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); - -/** * DOC: exp_hw_support (int) * Enable experimental hw support (1 = enable). The default is 0 (disabled). */ @@ -574,6 +575,44 @@ MODULE_PARM_DESC(smu_memory_pool_size, "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444); +/** + * DOC: async_gfx_ring (int) + * It is used to enable gfx rings that could be configured with different prioritites or equal priorities + */ +MODULE_PARM_DESC(async_gfx_ring, + "Asynchronous GFX rings that could be configured with either different priorities (HP3D ring and LP3D ring), or equal priorities (0 = disabled, 1 = enabled (default))"); +module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444); + +/** + * DOC: mcbp (int) + * It is used to enable mid command buffer preemption. (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(mcbp, + "Enable Mid-command buffer preemption (0 = disabled (default), 1 = enabled)"); +module_param_named(mcbp, amdgpu_mcbp, int, 0444); + +/** + * DOC: discovery (int) + * Allow driver to discover hardware IP information from IP Discovery table at the top of VRAM. + * (-1 = auto (default), 0 = disabled, 1 = enabled) + */ +MODULE_PARM_DESC(discovery, + "Allow driver to discover hardware IPs from IP Discovery table at the top of VRAM"); +module_param_named(discovery, amdgpu_discovery, int, 0444); + +/** + * DOC: mes (int) + * Enable Micro Engine Scheduler. This is a new hw scheduling engine for gfx, sdma, and compute. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(mes, + "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)"); +module_param_named(mes, amdgpu_mes, int, 0444); + +MODULE_PARM_DESC(noretry, + "Disable retry faults (0 = retry enabled (default), 1 = retry disabled)"); +module_param_named(noretry, amdgpu_noretry, int, 0644); + #ifdef CONFIG_HSA_AMD /** * DOC: sched_policy (int) @@ -650,17 +689,6 @@ MODULE_PARM_DESC(ignore_crat, "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); /** - * DOC: noretry (int) - * This parameter sets sh_mem_config.retry_disable. Default value, 0, enables retry. - * Setting 1 disables retry. - * Retry is needed for recoverable page faults. - */ -int noretry; -module_param(noretry, int, 0644); -MODULE_PARM_DESC(noretry, - "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); - -/** * DOC: halt_if_hws_hang (int) * Halt if HWS hang is detected. Default value, 0, disables the halt on hang. * Setting 1 enables halt on hang. @@ -678,6 +706,14 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau bool hws_gws_support; module_param(hws_gws_support, bool, 0444); MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)"); + +/** + * DOC: queue_preemption_timeout_ms (int) + * queue preemption timeout in ms (1 = Minimum, 9000 = default) + */ +int queue_preemption_timeout_ms = 9000; +module_param(queue_preemption_timeout_ms, int, 0644); +MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)"); #endif /** @@ -688,6 +724,22 @@ MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supp MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))"); module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); +/** + * DOC: abmlevel (uint) + * Override the default ABM (Adaptive Backlight Management) level used for DC + * enabled hardware. Requires DMCU to be supported and loaded. + * Valid levels are 0-4. A value of 0 indicates that ABM should be disabled by + * default. Values 1-4 control the maximum allowable brightness reduction via + * the ABM algorithm, with 1 being the least reduction and 4 being the most + * reduction. + * + * Defaults to 0, or disabled. Userspace can still override this level later + * after boot. + */ +uint amdgpu_dm_abm_level = 0; +MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) "); +module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -944,6 +996,14 @@ static const struct pci_device_id pciidlist[] = { /* Raven */ {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, + /* Navi10 */ + {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x7318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x7319, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0, 0, 0} }; @@ -1251,7 +1311,8 @@ int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) * By default timeout for non compute jobs is 10000. * And there is no timeout enforced on compute jobs. */ - adev->gfx_timeout = adev->sdma_timeout = adev->video_timeout = 10000; + adev->gfx_timeout = msecs_to_jiffies(10000); + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { @@ -1261,10 +1322,13 @@ int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) if (ret) return ret; - /* Invalidate 0 and negative values */ - if (timeout <= 0) { + if (timeout == 0) { index++; continue; + } else if (timeout < 0) { + timeout = MAX_SCHEDULE_TIMEOUT; + } else { + timeout = msecs_to_jiffies(timeout); } switch (index++) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index df49fa4bbf61..23085b352cf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -709,22 +709,30 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) amdgpu_fence_process(ring); seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); - seq_printf(m, "Last signaled fence 0x%08x\n", + seq_printf(m, "Last signaled fence 0x%08x\n", atomic_read(&ring->fence_drv.last_seq)); - seq_printf(m, "Last emitted 0x%08x\n", + seq_printf(m, "Last emitted 0x%08x\n", ring->fence_drv.sync_seq); + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX || + ring->funcs->type == AMDGPU_RING_TYPE_SDMA) { + seq_printf(m, "Last signaled trailing fence 0x%08x\n", + le32_to_cpu(*ring->trail_fence_cpu_addr)); + seq_printf(m, "Last emitted 0x%08x\n", + ring->trail_seq); + } + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) continue; /* set in CP_VMID_PREEMPT and preemption occurred */ - seq_printf(m, "Last preempted 0x%08x\n", + seq_printf(m, "Last preempted 0x%08x\n", le32_to_cpu(*(ring->fence_drv.cpu_addr + 2))); /* set in CP_VMID_RESET and reset occurred */ - seq_printf(m, "Last reset 0x%08x\n", + seq_printf(m, "Last reset 0x%08x\n", le32_to_cpu(*(ring->fence_drv.cpu_addr + 4))); /* Both preemption and reset occurred */ - seq_printf(m, "Last both 0x%08x\n", + seq_printf(m, "Last both 0x%08x\n", le32_to_cpu(*(ring->fence_drv.cpu_addr + 6))); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index dad2186f4ed5..df8a23554831 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -31,7 +31,8 @@ struct amdgpu_gds { uint32_t gds_size; uint32_t gws_size; uint32_t oa_size; - uint32_t gds_compute_max_wave_id; + uint32_t gds_compute_max_wave_id; + uint32_t vgt_gs_max_wave_id; }; struct amdgpu_gds_reg_offset { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 37b526c6f494..939f8305511b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -175,7 +175,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); - r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates, true); + r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates, false); if (r) { dev_err(adev->dev, "leaking bo va because " "we fail to reserve bo (%d)\n", r); @@ -327,8 +327,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, } if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, - bo->tbo.ttm->pages); + r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); if (r) goto release_object; @@ -612,7 +611,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); - r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates, true); + r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates, false); if (r) goto error_unref; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index f198185c1fb6..74066e1466f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -34,8 +34,8 @@ * GPU GFX IP block helpers function. */ -int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec, - int pipe, int queue) +int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, + int pipe, int queue) { int bit = 0; @@ -47,8 +47,8 @@ int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec, return bit; } -void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, - int *mec, int *pipe, int *queue) +void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit, + int *mec, int *pipe, int *queue) { *queue = bit % adev->gfx.mec.num_queue_per_pipe; *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) @@ -61,10 +61,40 @@ void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, int pipe, int queue) { - return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue), + return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), adev->gfx.mec.queue_bitmap); } +int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, + int me, int pipe, int queue) +{ + int bit = 0; + + bit += me * adev->gfx.me.num_pipe_per_me + * adev->gfx.me.num_queue_per_pipe; + bit += pipe * adev->gfx.me.num_queue_per_pipe; + bit += queue; + + return bit; +} + +void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, + int *me, int *pipe, int *queue) +{ + *queue = bit % adev->gfx.me.num_queue_per_pipe; + *pipe = (bit / adev->gfx.me.num_queue_per_pipe) + % adev->gfx.me.num_pipe_per_me; + *me = (bit / adev->gfx.me.num_queue_per_pipe) + / adev->gfx.me.num_pipe_per_me; +} + +bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, + int me, int pipe, int queue) +{ + return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue), + adev->gfx.me.queue_bitmap); +} + /** * amdgpu_gfx_scratch_get - Allocate a scratch register * @@ -199,6 +229,30 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; } +void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) +{ + int i, queue, pipe, me; + + for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) { + queue = i % adev->gfx.me.num_queue_per_pipe; + pipe = (i / adev->gfx.me.num_queue_per_pipe) + % adev->gfx.me.num_pipe_per_me; + me = (i / adev->gfx.me.num_queue_per_pipe) + / adev->gfx.me.num_pipe_per_me; + + if (me >= adev->gfx.me.num_me) + break; + /* policy: amdgpu owns the first queue per pipe at this stage + * will extend to mulitple queues per pipe later */ + if (me == 0 && queue < 1) + set_bit(i, adev->gfx.me.queue_bitmap); + } + + /* update the number of active graphics rings */ + adev->gfx.num_gfx_rings = + bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); +} + static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -213,7 +267,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; - amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); + amdgpu_gfx_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); /* * 1. Using pipes 2/3 from MEC 2 seems cause problems. @@ -306,9 +360,9 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, return 0; } -/* create MQD for each compute queue */ -int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, - unsigned mqd_size) +/* create MQD for each compute/gfx queue */ +int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size) { struct amdgpu_ring *ring = NULL; int r, i; @@ -335,6 +389,27 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } + if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { + /* create MQD for each KGQ */ + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->gfx.me.mqd_backup[i]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + } + } + } + /* create MQD for each KCQ */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -343,7 +418,7 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); return r; } @@ -357,11 +432,21 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, return 0; } -void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) +void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) { struct amdgpu_ring *ring = NULL; int i; + if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + kfree(adev->gfx.me.mqd_backup[i]); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); + } + } + for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; kfree(adev->gfx.mec.mqd_backup[i]); @@ -371,12 +456,81 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) } ring = &adev->gfx.kiq.ring; + if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) + kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]); kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, &ring->mqd_ptr); } +int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &kiq->ring; + int i; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * + adev->gfx.num_compute_rings)) + return -ENOMEM; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) + kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], + RESET_QUEUES, 0, 0); + + return amdgpu_ring_test_ring(kiq_ring); +} + +int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + uint64_t queue_mask = 0; + int r, i; + + if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) + return -EINVAL; + + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of queue_mask needs updating */ + if (WARN_ON(i > (sizeof(queue_mask)*8))) { + DRM_ERROR("Invalid KCQ enabled: %d\n", i); + break; + } + + queue_mask |= (1ull << i); + } + + DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, + kiq_ring->queue); + + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * + adev->gfx.num_compute_rings + + kiq->pmf->set_resources_size); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + return r; + } + + kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); + + r = amdgpu_ring_test_helper(kiq_ring); + if (r) + DRM_ERROR("KCQ enable failed\n"); + + return r; +} + /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable * * @adev: amdgpu_device pointer @@ -393,7 +547,9 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return; - if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->set_powergating_by_smu) + if (!is_support_sw_smu(adev) && + (!adev->powerplay.pp_funcs || + !adev->powerplay.pp_funcs->set_powergating_by_smu)) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 09fc53af3d35..1199b5828b90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -38,6 +38,7 @@ #define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L #define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L +#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES struct amdgpu_mec { @@ -54,12 +55,41 @@ struct amdgpu_mec { DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; +enum amdgpu_unmap_queues_action { + PREEMPT_QUEUES = 0, + RESET_QUEUES, + DISABLE_PROCESS_QUEUES, + PREEMPT_QUEUES_NO_UNMAP, +}; + +struct kiq_pm4_funcs { + /* Support ASIC-specific kiq pm4 packets*/ + void (*kiq_set_resources)(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask); + void (*kiq_map_queues)(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring); + void (*kiq_unmap_queues)(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq); + void (*kiq_query_status)(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq); + /* Packet sizes */ + int set_resources_size; + int map_queues_size; + int unmap_queues_size; + int query_status_size; +}; + struct amdgpu_kiq { u64 eop_gpu_addr; struct amdgpu_bo *eop_obj; spinlock_t ring_lock; struct amdgpu_ring ring; struct amdgpu_irq_src irq; + const struct kiq_pm4_funcs *pmf; }; /* @@ -131,6 +161,10 @@ struct amdgpu_gfx_config { uint32_t double_offchip_lds_buf; /* cached value of DB_DEBUG2 */ uint32_t db_debug2; + /* gfx10 specific config */ + uint32_t num_sc_per_sh; + uint32_t num_packer_per_sc; + uint32_t pa_sc_tile_steering_override; }; struct amdgpu_cu_info { @@ -161,7 +195,7 @@ struct amdgpu_gfx_funcs { uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst); void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, - u32 queue); + u32 queue, u32 vmid); }; struct amdgpu_ngg_buf { @@ -191,10 +225,38 @@ struct sq_work { unsigned ih_data; }; +struct amdgpu_pfp { + struct amdgpu_bo *pfp_fw_obj; + uint64_t pfp_fw_gpu_addr; + uint32_t *pfp_fw_ptr; +}; + +struct amdgpu_ce { + struct amdgpu_bo *ce_fw_obj; + uint64_t ce_fw_gpu_addr; + uint32_t *ce_fw_ptr; +}; + +struct amdgpu_me { + struct amdgpu_bo *me_fw_obj; + uint64_t me_fw_gpu_addr; + uint32_t *me_fw_ptr; + uint32_t num_me; + uint32_t num_pipe_per_me; + uint32_t num_queue_per_pipe; + void *mqd_backup[AMDGPU_MAX_GFX_RINGS + 1]; + + /* These are the resources for which amdgpu takes ownership */ + DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); +}; + struct amdgpu_gfx { struct mutex gpu_clock_mutex; struct amdgpu_gfx_config config; struct amdgpu_rlc rlc; + struct amdgpu_pfp pfp; + struct amdgpu_ce ce; + struct amdgpu_me me; struct amdgpu_mec mec; struct amdgpu_kiq kiq; struct amdgpu_scratch scratch; @@ -265,7 +327,7 @@ struct amdgpu_gfx { #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) -#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q)) +#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid)) /** * amdgpu_gfx_create_bitmask - create a bitmask @@ -297,17 +359,27 @@ void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, unsigned hpd_size); -int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, - unsigned mqd_size); -void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev); +int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size); +void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev); +int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev); +int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev); void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev); -int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec, - int pipe, int queue); -void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, - int *mec, int *pipe, int *queue); +void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev); + +int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, + int pipe, int queue); +void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit, + int *mec, int *pipe, int *queue); bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, int pipe, int queue); +int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, + int pipe, int queue); +void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, + int *me, int *pipe, int *queue); +bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, + int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index c124e583bb91..7850084a05e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, skip_preamble = ring->current_ctx == fence_ctx; if (job && ring->funcs->emit_cntxcntl) { status |= job->preamble_status; + status |= job->preemption_status; amdgpu_ring_emit_cntxcntl(ring, status); } @@ -219,9 +220,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* drop preamble IBs if we don't have a context switch */ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && - skip_preamble && - !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && - !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ + skip_preamble && + !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && + !amdgpu_mcbp && + !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; amdgpu_ring_emit_ib(ring, job, ib, status); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index df6d33381f8b..57b3d8a9bef3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -364,8 +364,11 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, if (updates && (!flushed || dma_fence_is_later(updates, flushed))) needs_flush = true; - /* Concurrent flushes are only possible starting with Vega10 */ - if (adev->asic_type < CHIP_VEGA10 && needs_flush) + /* Concurrent flushes are only possible starting with Vega10 and + * are broken on Navi10 and Navi14. + */ + if (needs_flush && (adev->asic_type < CHIP_VEGA10 || + adev->asic_type == CHIP_NAVI10)) continue; /* Good, we can use this VMID. Remember this submission as diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index e1b46a6703de..51e62504c279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -29,6 +29,8 @@ #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means context switch occured */ #define AMDGPU_HAVE_CTX_SWITCH (1 << 2) +/* bit set means IB is preempted */ +#define AMDGPU_IB_PREEMPTED (1 << 3) #define to_amdgpu_job(sched_job) \ container_of((sched_job), struct amdgpu_job, base) @@ -45,6 +47,7 @@ struct amdgpu_job { struct amdgpu_ib *ibs; struct dma_fence *fence; /* the hw fence */ uint32_t preamble_status; + uint32_t preemption_status; uint32_t num_ibs; void *owner; bool vm_needs_flush; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index a70e5a32749a..0cf7e8606fd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -44,7 +44,7 @@ #include "amdgpu_display.h" #include "amdgpu_ras.h" -static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) +void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { struct amdgpu_gpu_instance *gpu_instance; int i; @@ -105,7 +105,7 @@ done_free: dev->dev_private = NULL; } -static void amdgpu_register_gpu_instance(struct amdgpu_device *adev) +void amdgpu_register_gpu_instance(struct amdgpu_device *adev) { struct amdgpu_gpu_instance *gpu_instance; @@ -712,7 +712,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.ids_flags = 0; if (adev->flags & AMD_IS_APU) dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; @@ -765,6 +765,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads; + if (adev->family >= AMDGPU_FAMILY_NV) + dev_info.pa_sc_tile_steering_override = + adev->gfx.config.pa_sc_tile_steering_override; + return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; } @@ -1006,7 +1010,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto error_vm; } - if (amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, @@ -1069,7 +1073,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_vm_bo_rmv(adev, fpriv->prt_va); - if (amdgpu_sriov_vf(adev)) { + if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { /* TODO: how to handle reserve failure */ BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); amdgpu_vm_bo_rmv(adev, fpriv->csa_va); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h new file mode 100644 index 000000000000..78fe49033543 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -0,0 +1,101 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_MES_H__ +#define __AMDGPU_MES_H__ + +struct amdgpu_mes_funcs; + +struct amdgpu_mes { + struct amdgpu_adev *adev; + + const struct firmware *fw; + + /* mes ucode */ + struct amdgpu_bo *ucode_fw_obj; + uint64_t ucode_fw_gpu_addr; + uint32_t *ucode_fw_ptr; + uint32_t ucode_fw_version; + uint64_t uc_start_addr; + + /* mes ucode data */ + struct amdgpu_bo *data_fw_obj; + uint64_t data_fw_gpu_addr; + uint32_t *data_fw_ptr; + uint32_t data_fw_version; + uint64_t data_start_addr; + + /* ip specific functions */ + struct amdgpu_mes_funcs *funcs; +}; + +struct mes_add_queue_input { + uint32_t process_id; + uint64_t page_table_base_addr; + uint64_t process_va_start; + uint64_t process_va_end; + uint64_t process_quantum; + uint64_t process_context_addr; + uint64_t gang_quantum; + uint64_t gang_context_addr; + uint32_t inprocess_gang_priority; + uint32_t gang_global_priority_level; + uint32_t doorbell_offset; + uint64_t mqd_addr; + uint64_t wptr_addr; + uint32_t queue_type; + uint32_t paging; +}; + +struct mes_remove_queue_input { + uint32_t doorbell_offset; + uint64_t gang_context_addr; +}; + +struct mes_suspend_gang_input { + bool suspend_all_gangs; + uint64_t gang_context_addr; + uint64_t suspend_fence_addr; + uint32_t suspend_fence_value; +}; + +struct mes_resume_gang_input { + bool resume_all_gangs; + uint64_t gang_context_addr; +}; + +struct amdgpu_mes_funcs { + int (*add_hw_queue)(struct amdgpu_mes *mes, + struct mes_add_queue_input *input); + + int (*remove_hw_queue)(struct amdgpu_mes *mes, + struct mes_remove_queue_input *input); + + int (*suspend_gang)(struct amdgpu_mes *mes, + struct mes_suspend_gang_input *input); + + int (*resume_gang)(struct amdgpu_mes *mes, + struct mes_resume_gang_input *input); +}; + +#endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 4ff4cf5988ea..3971c201f320 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -45,49 +45,12 @@ #include <linux/firmware.h> #include <linux/module.h> -#include <linux/hmm.h> -#include <linux/interval_tree.h> - #include <drm/drm.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" /** - * struct amdgpu_mn - * - * @adev: amdgpu device pointer - * @mm: process address space - * @type: type of MMU notifier - * @work: destruction work item - * @node: hash table node to find structure by adev and mn - * @lock: rw semaphore protecting the notifier nodes - * @objects: interval tree containing amdgpu_mn_nodes - * @mirror: HMM mirror function support - * - * Data for each amdgpu device and process address space. - */ -struct amdgpu_mn { - /* constant after initialisation */ - struct amdgpu_device *adev; - struct mm_struct *mm; - enum amdgpu_mn_type type; - - /* only used on destruction */ - struct work_struct work; - - /* protected by adev->mn_lock */ - struct hlist_node node; - - /* objects protected by lock */ - struct rw_semaphore lock; - struct rb_root_cached objects; - - /* HMM mirror */ - struct hmm_mirror mirror; -}; - -/** * struct amdgpu_mn_node * * @it: interval node defining start-last of the affected address range @@ -519,7 +482,6 @@ void amdgpu_hmm_init_range(struct hmm_range *range) range->flags = hmm_range_flags; range->values = hmm_range_values; range->pfn_shift = PAGE_SHIFT; - range->pfns = NULL; INIT_LIST_HEAD(&range->list); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index f5b67c63ed6b..b8ed68943625 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -24,17 +24,53 @@ #ifndef __AMDGPU_MN_H__ #define __AMDGPU_MN_H__ -/* - * HMM mirror - */ -struct amdgpu_mn; -struct hmm_range; +#include <linux/types.h> +#include <linux/hmm.h> +#include <linux/rwsem.h> +#include <linux/workqueue.h> +#include <linux/interval_tree.h> enum amdgpu_mn_type { AMDGPU_MN_TYPE_GFX, AMDGPU_MN_TYPE_HSA, }; +/** + * struct amdgpu_mn + * + * @adev: amdgpu device pointer + * @mm: process address space + * @type: type of MMU notifier + * @work: destruction work item + * @node: hash table node to find structure by adev and mn + * @lock: rw semaphore protecting the notifier nodes + * @objects: interval tree containing amdgpu_mn_nodes + * @mirror: HMM mirror function support + * + * Data for each amdgpu device and process address space. + */ +struct amdgpu_mn { + /* constant after initialisation */ + struct amdgpu_device *adev; + struct mm_struct *mm; + enum amdgpu_mn_type type; + + /* only used on destruction */ + struct work_struct work; + + /* protected by adev->mn_lock */ + struct hlist_node node; + + /* objects protected by lock */ + struct rw_semaphore lock; + struct rb_root_cached objects; + +#ifdef CONFIG_HMM_MIRROR + /* HMM mirror */ + struct hmm_mirror mirror; +#endif +}; + #if defined(CONFIG_HMM_MIRROR) void amdgpu_mn_lock(struct amdgpu_mn *mn); void amdgpu_mn_unlock(struct amdgpu_mn *mn); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 16f96f2e3671..bea6f298dfdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -495,7 +495,11 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, #endif bo->tbo.bdev = &adev->mman.bdev; - amdgpu_bo_placement_from_domain(bo, bp->domain); + if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | + AMDGPU_GEM_DOMAIN_GDS)) + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + else + amdgpu_bo_placement_from_domain(bo, bp->domain); if (bp->type == ttm_bo_type_kernel) bo->tbo.priority = 1; @@ -975,6 +979,7 @@ static const char *amdgpu_vram_names[] = { "HBM", "DDR3", "DDR4", + "GDDR6", }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index c430e8259038..d60593cc436e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -155,7 +155,7 @@ static inline int amdgpu_bo_reserve(struct amdgpu_bo *bo, bool no_intr) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); int r; - r = ttm_bo_reserve(&bo->tbo, !no_intr, false, NULL); + r = __ttm_bo_reserve(&bo->tbo, !no_intr, false, NULL); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) dev_err(adev->dev, "%p reserve failed\n", bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 194d0c75b072..8b7efd0a7028 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -67,6 +67,9 @@ static const struct cg_flag_name clocks[] = { {AMD_CG_SUPPORT_DRM_LS, "Digital Right Management Light Sleep"}, {AMD_CG_SUPPORT_ROM_MGCG, "Rom Medium Grain Clock Gating"}, {AMD_CG_SUPPORT_DF_MGCG, "Data Fabric Medium Grain Clock Gating"}, + + {AMD_CG_SUPPORT_ATHUB_MGCG, "Address Translation Hub Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_ATHUB_LS, "Address Translation Hub Light Sleep"}, {0, NULL}, }; @@ -272,8 +275,11 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; enum amd_dpm_forced_level level = 0xff; - if ((adev->flags & AMD_IS_PX) && - (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + if (amdgpu_sriov_vf(adev)) + return 0; + + if ((adev->flags & AMD_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return snprintf(buf, PAGE_SIZE, "off\n"); if (is_support_sw_smu(adev)) @@ -311,10 +317,12 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (is_support_sw_smu(adev)) - current_level = smu_get_performance_level(&adev->smu); - else if (adev->powerplay.pp_funcs->get_performance_level) - current_level = amdgpu_dpm_get_performance_level(adev); + if (!amdgpu_sriov_vf(adev)) { + if (is_support_sw_smu(adev)) + current_level = smu_get_performance_level(&adev->smu); + else if (adev->powerplay.pp_funcs->get_performance_level) + current_level = amdgpu_dpm_get_performance_level(adev); + } if (strncmp("low", buf, strlen("low")) == 0) { level = AMD_DPM_FORCED_LEVEL_LOW; @@ -365,18 +373,9 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, } if (is_support_sw_smu(adev)) { - mutex_lock(&adev->pm.mutex); - if (adev->pm.dpm.thermal_active) { - count = -EINVAL; - mutex_unlock(&adev->pm.mutex); - goto fail; - } ret = smu_force_performance_level(&adev->smu, level); if (ret) count = -EINVAL; - else - adev->pm.dpm.forced_level = level; - mutex_unlock(&adev->pm.mutex); } else if (adev->powerplay.pp_funcs->force_performance_level) { mutex_lock(&adev->pm.mutex); if (adev->pm.dpm.thermal_active) { @@ -690,12 +689,12 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, if (ret) return -EINVAL; } else { - if (adev->powerplay.pp_funcs->odn_edit_dpm_table) + if (adev->powerplay.pp_funcs->odn_edit_dpm_table) { ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, parameter_size); - - if (ret) - return -EINVAL; + if (ret) + return -EINVAL; + } if (type == PP_OD_COMMIT_DPM_TABLE) { if (adev->powerplay.pp_funcs->dispatch_tasks) { @@ -721,10 +720,10 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, uint32_t size = 0; if (is_support_sw_smu(adev)) { - size = smu_print_clk_levels(&adev->smu, OD_SCLK, buf); - size += smu_print_clk_levels(&adev->smu, OD_MCLK, buf+size); - size += smu_print_clk_levels(&adev->smu, OD_VDDC_CURVE, buf+size); - size += smu_print_clk_levels(&adev->smu, OD_RANGE, buf+size); + size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf); + size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size); + size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size); + size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size); return size; } else if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); @@ -835,7 +834,7 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, return adev->virt.ops->get_pp_clk(adev, PP_SCLK, buf); if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, PP_SCLK, buf); + return smu_print_clk_levels(&adev->smu, SMU_SCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); else @@ -888,12 +887,15 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev)) + return 0; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, PP_SCLK, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); @@ -910,8 +912,12 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev) && + adev->virt.ops->get_pp_clk) + return adev->virt.ops->get_pp_clk(adev, PP_MCLK, buf); + if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, PP_MCLK, buf); + return smu_print_clk_levels(&adev->smu, SMU_MCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); else @@ -928,12 +934,15 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, int ret; uint32_t mask = 0; + if (amdgpu_sriov_vf(adev)) + return 0; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, PP_MCLK, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); @@ -951,7 +960,7 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, PP_SOCCLK, buf); + return smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf); else @@ -973,7 +982,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, PP_SOCCLK, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask); @@ -991,7 +1000,7 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, PP_FCLK, buf); + return smu_print_clk_levels(&adev->smu, SMU_FCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf); else @@ -1013,7 +1022,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, PP_FCLK, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask); @@ -1031,7 +1040,7 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, PP_DCEFCLK, buf); + return smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf); else @@ -1053,7 +1062,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, PP_DCEFCLK, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask); @@ -1071,7 +1080,7 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; if (is_support_sw_smu(adev)) - return smu_print_clk_levels(&adev->smu, PP_PCIE, buf); + return smu_print_clk_levels(&adev->smu, SMU_PCIE, buf); else if (adev->powerplay.pp_funcs->print_clock_levels) return amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); else @@ -1093,7 +1102,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, return ret; if (is_support_sw_smu(adev)) - ret = smu_force_clk_levels(&adev->smu, PP_PCIE, mask); + ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask); else if (adev->powerplay.pp_funcs->force_clock_level) ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); @@ -1112,7 +1121,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, uint32_t value = 0; if (is_support_sw_smu(adev)) - value = smu_get_od_percentage(&(adev->smu), OD_SCLK); + value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK); else if (adev->powerplay.pp_funcs->get_sclk_od) value = amdgpu_dpm_get_sclk_od(adev); @@ -1137,7 +1146,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, } if (is_support_sw_smu(adev)) { - value = smu_set_od_percentage(&(adev->smu), OD_SCLK, (uint32_t)value); + value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value); } else { if (adev->powerplay.pp_funcs->set_sclk_od) amdgpu_dpm_set_sclk_od(adev, (uint32_t)value); @@ -1163,7 +1172,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, uint32_t value = 0; if (is_support_sw_smu(adev)) - value = smu_get_od_percentage(&(adev->smu), OD_MCLK); + value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK); else if (adev->powerplay.pp_funcs->get_mclk_od) value = amdgpu_dpm_get_mclk_od(adev); @@ -1188,7 +1197,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, } if (is_support_sw_smu(adev)) { - value = smu_set_od_percentage(&(adev->smu), OD_MCLK, (uint32_t)value); + value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value); } else { if (adev->powerplay.pp_funcs->set_mclk_od) amdgpu_dpm_set_mclk_od(adev, (uint32_t)value); @@ -1453,7 +1462,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; int channel = to_sensor_dev_attr(attr)->index; - int r, temp, size = sizeof(temp); + int r, temp = 0, size = sizeof(temp); /* Can't get temperature when the card is off */ if ((adev->flags & AMD_IS_PX) && @@ -2068,11 +2077,6 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&sclk, &size); @@ -2103,11 +2107,6 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - /* sanity check PP is enabled */ - if (!(adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->read_sensor)) - return -EINVAL; - /* get the sclk */ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&mclk, &size); @@ -2701,6 +2700,44 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev) } +int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev) +{ + int ret = 0; + + if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))) + return ret; + + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_sclk\n"); + return ret; + } + + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_mclk\n"); + return ret; + } + + ret = device_create_file(adev->dev, &dev_attr_power_dpm_force_performance_level); + if (ret) { + DRM_ERROR("failed to create device file for dpm state\n"); + return ret; + } + + return ret; +} + +void amdgpu_pm_virt_sysfs_fini(struct amdgpu_device *adev) +{ + if (!(amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))) + return; + + device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level); + device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk); + device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); +} + int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version) { int r; @@ -2839,7 +2876,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } /* APU does not have its own dedicated memory */ - if (!(adev->flags & AMD_IS_APU)) { + if (!(adev->flags & AMD_IS_APU) && + (adev->asic_type != CHIP_VEGA10)) { ret = device_create_file(adev->dev, &dev_attr_mem_busy_percent); if (ret) { @@ -2919,7 +2957,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_od_clk_voltage); device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); - if (!(adev->flags & AMD_IS_APU)) + if (!(adev->flags & AMD_IS_APU) && + (adev->asic_type != CHIP_VEGA10)) device_remove_file(adev->dev, &dev_attr_mem_busy_percent); if (!(adev->flags & AMD_IS_APU)) device_remove_file(adev->dev, &dev_attr_pcie_bw); @@ -2947,13 +2986,10 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) } if (is_support_sw_smu(adev)) { - struct smu_context *smu = &adev->smu; struct smu_dpm_context *smu_dpm = &adev->smu.smu_dpm; - mutex_lock(&(smu->mutex)); smu_handle_task(&adev->smu, smu_dpm->dpm_level, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE); - mutex_unlock(&(smu->mutex)); } else { if (adev->powerplay.pp_funcs->dispatch_tasks) { if (!amdgpu_device_has_dc_support(adev)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h index 7ff0e7621fff..ef31448ee8d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h @@ -32,7 +32,9 @@ struct cg_flag_name void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev); int amdgpu_pm_sysfs_init(struct amdgpu_device *adev); +int amdgpu_pm_virt_sysfs_init(struct amdgpu_device *adev); void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev); +void amdgpu_pm_virt_sysfs_fini(struct amdgpu_device *adev); void amdgpu_pm_print_power_states(struct amdgpu_device *adev); int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version); void amdgpu_pm_compute_clocks(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c new file mode 100644 index 000000000000..0e6dba9f60f0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -0,0 +1,280 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Jonathan Kim <jonathan.kim@amd.com> + * + */ + +#include <linux/perf_event.h> +#include <linux/init.h> +#include "amdgpu.h" +#include "amdgpu_pmu.h" +#include "df_v3_6.h" + +#define PMU_NAME_SIZE 32 + +/* record to keep track of pmu entry per pmu type per device */ +struct amdgpu_pmu_entry { + struct list_head entry; + struct amdgpu_device *adev; + struct pmu pmu; + unsigned int pmu_perf_type; +}; + +static LIST_HEAD(amdgpu_pmu_list); + + +/* initialize perf counter */ +static int amdgpu_perf_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* test the event attr type check for PMU enumeration */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* update the hw_perf_event struct with config data */ + hwc->conf = event->attr.config; + + return 0; +} + +/* start perf counter */ +static void amdgpu_perf_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct amdgpu_pmu_entry *pe = container_of(event->pmu, + struct amdgpu_pmu_entry, + pmu); + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + switch (pe->pmu_perf_type) { + case PERF_TYPE_AMDGPU_DF: + if (!(flags & PERF_EF_RELOAD)) + pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + + pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 0); + break; + default: + break; + } + + perf_event_update_userpage(event); + +} + +/* read perf counter */ +static void amdgpu_perf_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct amdgpu_pmu_entry *pe = container_of(event->pmu, + struct amdgpu_pmu_entry, + pmu); + + u64 count, prev; + + do { + prev = local64_read(&hwc->prev_count); + + switch (pe->pmu_perf_type) { + case PERF_TYPE_AMDGPU_DF: + pe->adev->df_funcs->pmc_get_count(pe->adev, hwc->conf, + &count); + break; + default: + count = 0; + break; + }; + } while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev); + + local64_add(count - prev, &event->count); +} + +/* stop perf counter */ +static void amdgpu_perf_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct amdgpu_pmu_entry *pe = container_of(event->pmu, + struct amdgpu_pmu_entry, + pmu); + + if (hwc->state & PERF_HES_UPTODATE) + return; + + switch (pe->pmu_perf_type) { + case PERF_TYPE_AMDGPU_DF: + pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 0); + break; + default: + break; + }; + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if (hwc->state & PERF_HES_UPTODATE) + return; + + amdgpu_perf_read(event); + hwc->state |= PERF_HES_UPTODATE; +} + +/* add perf counter */ +static int amdgpu_perf_add(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + int retval; + + struct amdgpu_pmu_entry *pe = container_of(event->pmu, + struct amdgpu_pmu_entry, + pmu); + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + switch (pe->pmu_perf_type) { + case PERF_TYPE_AMDGPU_DF: + retval = pe->adev->df_funcs->pmc_start(pe->adev, hwc->conf, 1); + break; + default: + return 0; + }; + + if (retval) + return retval; + + if (flags & PERF_EF_START) + amdgpu_perf_start(event, PERF_EF_RELOAD); + + return retval; + +} + +/* delete perf counter */ +static void amdgpu_perf_del(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct amdgpu_pmu_entry *pe = container_of(event->pmu, + struct amdgpu_pmu_entry, + pmu); + + amdgpu_perf_stop(event, PERF_EF_UPDATE); + + switch (pe->pmu_perf_type) { + case PERF_TYPE_AMDGPU_DF: + pe->adev->df_funcs->pmc_stop(pe->adev, hwc->conf, 1); + break; + default: + break; + }; + + perf_event_update_userpage(event); +} + +/* vega20 pmus */ + +/* init pmu tracking per pmu type */ +static int init_pmu_by_type(struct amdgpu_device *adev, + const struct attribute_group *attr_groups[], + char *pmu_type_name, char *pmu_file_prefix, + unsigned int pmu_perf_type, + unsigned int num_counters) +{ + char pmu_name[PMU_NAME_SIZE]; + struct amdgpu_pmu_entry *pmu_entry; + int ret = 0; + + pmu_entry = kzalloc(sizeof(struct amdgpu_pmu_entry), GFP_KERNEL); + + if (!pmu_entry) + return -ENOMEM; + + pmu_entry->adev = adev; + pmu_entry->pmu = (struct pmu){ + .event_init = amdgpu_perf_event_init, + .add = amdgpu_perf_add, + .del = amdgpu_perf_del, + .start = amdgpu_perf_start, + .stop = amdgpu_perf_stop, + .read = amdgpu_perf_read, + .task_ctx_nr = perf_invalid_context, + }; + + pmu_entry->pmu.attr_groups = attr_groups; + pmu_entry->pmu_perf_type = pmu_perf_type; + snprintf(pmu_name, PMU_NAME_SIZE, "%s_%d", + pmu_file_prefix, adev->ddev->primary->index); + + ret = perf_pmu_register(&pmu_entry->pmu, pmu_name, -1); + + if (ret) { + kfree(pmu_entry); + pr_warn("Error initializing AMDGPU %s PMUs.\n", pmu_type_name); + return ret; + } + + pr_info("Detected AMDGPU %s Counters. # of Counters = %d.\n", + pmu_type_name, num_counters); + + list_add_tail(&pmu_entry->entry, &amdgpu_pmu_list); + + return 0; +} + +/* init amdgpu_pmu */ +int amdgpu_pmu_init(struct amdgpu_device *adev) +{ + int ret = 0; + + switch (adev->asic_type) { + case CHIP_VEGA20: + /* init df */ + ret = init_pmu_by_type(adev, df_v3_6_attr_groups, + "DF", "amdgpu_df", PERF_TYPE_AMDGPU_DF, + DF_V3_6_MAX_COUNTERS); + + /* other pmu types go here*/ + break; + default: + return 0; + } + + return 0; +} + + +/* destroy all pmu data associated with target device */ +void amdgpu_pmu_fini(struct amdgpu_device *adev) +{ + struct amdgpu_pmu_entry *pe, *temp; + + list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) { + if (pe->adev == adev) { + list_del(&pe->entry); + perf_pmu_unregister(&pe->pmu); + kfree(pe); + } + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h new file mode 100644 index 000000000000..7dddb7160a11 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h @@ -0,0 +1,37 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Jonathan Kim <jonathan.kim@amd.com> + * + */ + +#ifndef _AMDGPU_PMU_H_ +#define _AMDGPU_PMU_H_ + +enum amdgpu_pmu_perf_type { + PERF_TYPE_AMDGPU_DF = 0, + PERF_TYPE_AMDGPU_MAX +}; + +int amdgpu_pmu_init(struct amdgpu_device *adev); +void amdgpu_pmu_fini(struct amdgpu_device *adev); + +#endif /* _AMDGPU_PMU_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 909be1bf2294..c027e5e7713e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -46,12 +46,19 @@ static int psp_early_init(void *handle) case CHIP_VEGA10: case CHIP_VEGA12: psp_v3_1_set_psp_funcs(psp); + psp->autoload_supported = false; break; case CHIP_RAVEN: psp_v10_0_set_psp_funcs(psp); + psp->autoload_supported = false; break; case CHIP_VEGA20: psp_v11_0_set_psp_funcs(psp); + psp->autoload_supported = false; + break; + case CHIP_NAVI10: + psp_v11_0_set_psp_funcs(psp); + psp->autoload_supported = true; break; default: return -EINVAL; @@ -123,6 +130,8 @@ psp_cmd_submit_buf(struct psp_context *psp, int index; int timeout = 2000; + mutex_lock(&psp->mutex); + memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); @@ -132,6 +141,7 @@ psp_cmd_submit_buf(struct psp_context *psp, fence_mc_addr, index); if (ret) { atomic_dec(&psp->fence_value); + mutex_unlock(&psp->mutex); return ret; } @@ -154,8 +164,10 @@ psp_cmd_submit_buf(struct psp_context *psp, ucode->ucode_id); DRM_WARN("psp command failed and response status is (%d)\n", psp->cmd_buf_mem->resp.status); - if (!timeout) + if (!timeout) { + mutex_unlock(&psp->mutex); return -EINVAL; + } } /* get xGMI session id from response buffer */ @@ -165,6 +177,7 @@ psp_cmd_submit_buf(struct psp_context *psp, ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo; ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi; } + mutex_unlock(&psp->mutex); return ret; } @@ -182,10 +195,44 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp, cmd->cmd.cmd_setup_tmr.buf_size = size; } +static void psp_prep_load_toc_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t pri_buf_mc, uint32_t size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_TOC; + cmd->cmd.cmd_load_toc.toc_phy_addr_lo = lower_32_bits(pri_buf_mc); + cmd->cmd.cmd_load_toc.toc_phy_addr_hi = upper_32_bits(pri_buf_mc); + cmd->cmd.cmd_load_toc.toc_size = size; +} + +/* Issue LOAD TOC cmd to PSP to part toc and calculate tmr size needed */ +static int psp_load_toc(struct psp_context *psp, + uint32_t *tmr_size) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + /* Copy toc to psp firmware private buffer */ + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->toc_start_addr, psp->toc_bin_size); + + psp_prep_load_toc_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->toc_bin_size); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + if (!ret) + *tmr_size = psp->cmd_buf_mem->resp.tmr_size; + kfree(cmd); + return ret; +} + /* Set up Trusted Memory Region */ static int psp_tmr_init(struct psp_context *psp) { int ret; + int tmr_size; /* * According to HW engineer, they prefer the TMR address be "naturally @@ -194,7 +241,21 @@ static int psp_tmr_init(struct psp_context *psp) * Note: this memory need be reserved till the driver * uninitializes. */ - ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, PSP_TMR_SIZE, + tmr_size = PSP_TMR_SIZE; + + /* For ASICs support RLC autoload, psp will parse the toc + * and calculate the total size of TMR needed */ + if (psp->toc_start_addr && + psp->toc_bin_size && + psp->fw_pri_buf) { + ret = psp_load_toc(psp, &tmr_size); + if (ret) { + DRM_ERROR("Failed to load toc\n"); + return ret; + } + } + + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); @@ -210,9 +271,10 @@ static int psp_tmr_load(struct psp_context *psp) if (!cmd) return -ENOMEM; - psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, PSP_TMR_SIZE); - DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n", - PSP_TMR_SIZE, psp->tmr_mc_addr); + psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, + amdgpu_bo_size(psp->tmr_bo)); + DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", + amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); @@ -707,6 +769,15 @@ static int psp_hw_start(struct psp_context *psp) int ret; if (!amdgpu_sriov_vf(adev) || !adev->in_gpu_reset) { + if (psp->kdb_bin_size && + (psp->funcs->bootloader_load_kdb != NULL)) { + ret = psp_bootloader_load_kdb(psp); + if (ret) { + DRM_ERROR("PSP load kdb failed!\n"); + return ret; + } + } + ret = psp_bootloader_load_sysdrv(psp); if (ret) { DRM_ERROR("PSP load sysdrv failed!\n"); @@ -726,12 +797,24 @@ static int psp_hw_start(struct psp_context *psp) return ret; } + ret = psp_tmr_init(psp); + if (ret) { + DRM_ERROR("PSP tmr init failed!\n"); + return ret; + } + ret = psp_tmr_load(psp); if (ret) { DRM_ERROR("PSP load tmr failed!\n"); return ret; } + ret = psp_asd_init(psp); + if (ret) { + DRM_ERROR("PSP asd init failed!\n"); + return ret; + } + ret = psp_asd_load(psp); if (ret) { DRM_ERROR("PSP load asd failed!\n"); @@ -823,6 +906,12 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_DMCU_INTV: *type = GFX_FW_TYPE_DMCU_ISR; break; + case AMDGPU_UCODE_ID_VCN0_RAM: + *type = GFX_FW_TYPE_VCN0_RAM; + break; + case AMDGPU_UCODE_ID_VCN1_RAM: + *type = GFX_FW_TYPE_VCN1_RAM; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; @@ -851,19 +940,45 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, return ret; } +static int psp_execute_np_fw_load(struct psp_context *psp, + struct amdgpu_firmware_info *ucode) +{ + int ret = 0; + + ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd); + if (ret) + return ret; + + ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, + psp->fence_buf_mc_addr); + + return ret; +} + static int psp_np_fw_load(struct psp_context *psp) { int i, ret; struct amdgpu_firmware_info *ucode; struct amdgpu_device* adev = psp->adev; + if (psp->autoload_supported) { + ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; + if (!ucode->fw) + goto out; + + ret = psp_execute_np_fw_load(psp, ucode); + if (ret) + return ret; + } + +out: for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; if (!ucode->fw) continue; if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && - psp_smu_reload_quirk(psp)) + (psp_smu_reload_quirk(psp) || psp->autoload_supported)) continue; if (amdgpu_sriov_vf(adev) && (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0 @@ -871,16 +986,24 @@ static int psp_np_fw_load(struct psp_context *psp) || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) /*skip ucode loading in SRIOV VF */ continue; + if (psp->autoload_supported && + (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT || + ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT)) + /* skip mec JT when autoload is enabled */ + continue; - ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd); - if (ret) - return ret; - - ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, - psp->fence_buf_mc_addr); + ret = psp_execute_np_fw_load(psp, ucode); if (ret) return ret; + /* Start rlc autoload after psp recieved all the gfx firmware */ + if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + ret = psp_rlc_autoload(psp); + if (ret) { + DRM_ERROR("Failed to start rlc autoload\n"); + return ret; + } + } #if 0 /* check if firmware loaded sucessfully */ if (!amdgpu_psp_check_fw_loading_status(adev, i)) @@ -939,18 +1062,6 @@ static int psp_load_fw(struct amdgpu_device *adev) goto failed; } - ret = psp_tmr_init(psp); - if (ret) { - DRM_ERROR("PSP tmr init failed!\n"); - goto failed; - } - - ret = psp_asd_init(psp); - if (ret) { - DRM_ERROR("PSP asd init failed!\n"); - goto failed; - } - skip_memalloc: ret = psp_hw_start(psp); if (ret) @@ -1092,10 +1203,49 @@ failed: int psp_gpu_reset(struct amdgpu_device *adev) { + int ret; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; - return psp_mode1_reset(&adev->psp); + mutex_lock(&adev->psp.mutex); + ret = psp_mode1_reset(&adev->psp); + mutex_unlock(&adev->psp.mutex); + + return ret; +} + +int psp_rlc_autoload_start(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + cmd->cmd_id = GFX_CMD_ID_AUTOLOAD_RLC; + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + kfree(cmd); + return ret; +} + +int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, + uint64_t cmd_gpu_addr, int cmd_size) +{ + struct amdgpu_firmware_info ucode = {0}; + + ucode.ucode_id = inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : + AMDGPU_UCODE_ID_VCN0_RAM; + ucode.mc_addr = cmd_gpu_addr; + ucode.ucode_size = cmd_size; + + return psp_execute_np_fw_load(&adev->psp, &ucode); } static bool psp_check_fw_loading_status(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index cf49539b0b07..e0fc2a790e53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -42,6 +42,12 @@ struct psp_context; struct psp_xgmi_node_info; struct psp_xgmi_topology_info; +enum psp_bootloader_cmd { + PSP_BL__LOAD_SYSDRV = 0x10000, + PSP_BL__LOAD_SOSDRV = 0x20000, + PSP_BL__LOAD_KEY_DATABASE = 0x80000, +}; + enum psp_ring_type { PSP_RING_TYPE__INVALID = 0, @@ -73,6 +79,7 @@ enum psp_reg_prog_id { struct psp_funcs { int (*init_microcode)(struct psp_context *psp); + int (*bootloader_load_kdb)(struct psp_context *psp); int (*bootloader_load_sysdrv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); @@ -101,6 +108,7 @@ struct psp_funcs int (*ras_trigger_error)(struct psp_context *psp, struct ta_ras_trigger_error_input *info); int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr); + int (*rlc_autoload_start)(struct psp_context *psp); }; #define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 @@ -154,8 +162,12 @@ struct psp_context uint32_t sos_feature_version; uint32_t sys_bin_size; uint32_t sos_bin_size; + uint32_t toc_bin_size; + uint32_t kdb_bin_size; uint8_t *sys_start_addr; uint8_t *sos_start_addr; + uint8_t *toc_start_addr; + uint8_t *kdb_start_addr; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -184,6 +196,8 @@ struct psp_context /* fence value associated with cmd buffer */ atomic_t fence_value; + /* flag to mark whether gfx fw autoload is supported or not */ + bool autoload_supported; /* xgmi ta firmware and buffer */ const struct firmware *ta_fw; @@ -196,6 +210,7 @@ struct psp_context uint8_t *ta_ras_start_addr; struct psp_xgmi_context xgmi_context; struct psp_ras_context ras; + struct mutex mutex; }; struct amdgpu_psp_funcs { @@ -214,6 +229,8 @@ struct amdgpu_psp_funcs { (psp)->funcs->compare_sram_data((psp), (ucode), (type)) #define psp_init_microcode(psp) \ ((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0) +#define psp_bootloader_load_kdb(psp) \ + ((psp)->funcs->bootloader_load_kdb ? (psp)->funcs->bootloader_load_kdb((psp)) : 0) #define psp_bootloader_load_sysdrv(psp) \ ((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ @@ -234,6 +251,8 @@ struct amdgpu_psp_funcs { #define psp_xgmi_set_topology_info(psp, num_device, topology) \ ((psp)->funcs->xgmi_set_topology_info ? \ (psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL) +#define psp_rlc_autoload(psp) \ + ((psp)->funcs->rlc_autoload_start ? (psp)->funcs->rlc_autoload_start((psp)) : 0) #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) @@ -253,12 +272,17 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; int psp_gpu_reset(struct amdgpu_device *adev); +int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, + uint64_t cmd_gpu_addr, int cmd_size); + int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); +int psp_rlc_autoload_start(struct psp_context *psp); + extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, uint32_t value); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4d387557cc37..1a4412e47810 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -335,12 +335,13 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * case 2: ret = amdgpu_ras_reserve_vram(adev, data.inject.address, PAGE_SIZE, &bo); - /* This address might be used already on failure. In fact we can - * perform an injection in such case. - */ - if (ret) - break; - data.inject.address = amdgpu_bo_gpu_offset(bo); + if (ret) { + /* address was offset, now it is absolute.*/ + data.inject.address += adev->gmc.vram_start; + if (data.inject.address > adev->gmc.vram_end) + break; + } else + data.inject.address = amdgpu_bo_gpu_offset(bo); ret = amdgpu_ras_error_inject(adev, &data.inject); amdgpu_ras_release_vram(adev, &bo); break; @@ -969,40 +970,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) /* sysfs end */ /* debugfs begin */ -static int amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) +static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct drm_minor *minor = adev->ddev->primary; - struct dentry *root = minor->debugfs_root, *dir; - struct dentry *ent; - - dir = debugfs_create_dir("ras", root); - if (IS_ERR(dir)) - return -EINVAL; - - con->dir = dir; - ent = debugfs_create_file("ras_ctrl", - S_IWUGO | S_IRUGO, con->dir, - adev, &amdgpu_ras_debugfs_ctrl_ops); - if (IS_ERR(ent)) { - debugfs_remove(con->dir); - return -EINVAL; - } - - con->ent = ent; - return 0; + con->dir = debugfs_create_dir("ras", minor->debugfs_root); + con->ent = debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir, + adev, &amdgpu_ras_debugfs_ctrl_ops); } -int amdgpu_ras_debugfs_create(struct amdgpu_device *adev, +void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, struct ras_fs_if *head) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head); - struct dentry *ent; if (!obj || obj->ent) - return -EINVAL; + return; get_obj(obj); @@ -1010,34 +995,25 @@ int amdgpu_ras_debugfs_create(struct amdgpu_device *adev, head->debugfs_name, sizeof(obj->fs_data.debugfs_name)); - ent = debugfs_create_file(obj->fs_data.debugfs_name, - S_IWUGO | S_IRUGO, con->dir, - obj, &amdgpu_ras_debugfs_ops); - - if (IS_ERR(ent)) - return -EINVAL; - - obj->ent = ent; - - return 0; + obj->ent = debugfs_create_file(obj->fs_data.debugfs_name, + S_IWUGO | S_IRUGO, con->dir, obj, + &amdgpu_ras_debugfs_ops); } -int amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, +void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, struct ras_common_if *head) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); if (!obj || !obj->ent) - return 0; + return; debugfs_remove(obj->ent); obj->ent = NULL; put_obj(obj); - - return 0; } -static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) +static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; @@ -1050,8 +1026,6 @@ static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) debugfs_remove(con->dir); con->dir = NULL; con->ent = NULL; - - return 0; } /* debugfs end */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 94c652f5265a..b2841195bd3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -276,10 +276,10 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, struct ras_common_if *head); -int amdgpu_ras_debugfs_create(struct amdgpu_device *adev, +void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, struct ras_fs_if *head); -int amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, +void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, struct ras_common_if *head); int amdgpu_ras_error_query(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index ee440fe29b91..e5c83e164d82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -282,6 +282,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } + r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", r); + return r; + } + ring->trail_fence_gpu_addr = + adev->wb.gpu_addr + (ring->trail_fence_offs * 4); + ring->trail_fence_cpu_addr = &adev->wb.wb[ring->trail_fence_offs]; + r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); if (r) { dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); @@ -400,7 +410,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { ktime_t deadline = ktime_add_us(ktime_get(), 10000); - if (!ring->funcs->soft_recovery || !fence) + if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) return false; atomic_inc(&ring->adev->gpu_reset_counter); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index cdddce938bf5..4410c97ac9b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -29,8 +29,8 @@ #include <drm/drm_print.h> /* max number of rings */ -#define AMDGPU_MAX_RINGS 23 -#define AMDGPU_MAX_GFX_RINGS 1 +#define AMDGPU_MAX_RINGS 24 +#define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 #define AMDGPU_MAX_UVD_ENC_RINGS 2 @@ -172,6 +172,7 @@ struct amdgpu_ring_funcs { enum drm_sched_priority priority); /* Try to soft recover the ring to make the fence signal */ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); + int (*preempt_ib)(struct amdgpu_ring *ring); }; struct amdgpu_ring { @@ -206,6 +207,10 @@ struct amdgpu_ring { unsigned fence_offs; uint64_t current_ctx; char name[16]; + u32 trail_seq; + unsigned trail_fence_offs; + u64 trail_fence_gpu_addr; + volatile u32 *trail_fence_cpu_addr; unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; @@ -246,6 +251,7 @@ struct amdgpu_ring { #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) +#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); @@ -266,6 +272,12 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, struct dma_fence *fence); +static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring, + bool cond_exec) +{ + *ring->cond_exe_cpu_addr = cond_exec; +} + static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { int i = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 49a8ab52113b..d3d4707f2168 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -26,6 +26,94 @@ #include "clearstate_defs.h" +/* firmware ID used in rlc toc */ +typedef enum _FIRMWARE_ID_ { + FIRMWARE_ID_INVALID = 0, + FIRMWARE_ID_RLC_G_UCODE = 1, + FIRMWARE_ID_RLC_TOC = 2, + FIRMWARE_ID_RLCG_SCRATCH = 3, + FIRMWARE_ID_RLC_SRM_ARAM = 4, + FIRMWARE_ID_RLC_SRM_INDEX_ADDR = 5, + FIRMWARE_ID_RLC_SRM_INDEX_DATA = 6, + FIRMWARE_ID_RLC_P_UCODE = 7, + FIRMWARE_ID_RLC_V_UCODE = 8, + FIRMWARE_ID_RLX6_UCODE = 9, + FIRMWARE_ID_RLX6_DRAM_BOOT = 10, + FIRMWARE_ID_GLOBAL_TAP_DELAYS = 11, + FIRMWARE_ID_SE0_TAP_DELAYS = 12, + FIRMWARE_ID_SE1_TAP_DELAYS = 13, + FIRMWARE_ID_GLOBAL_SE0_SE1_SKEW_DELAYS = 14, + FIRMWARE_ID_SDMA0_UCODE = 15, + FIRMWARE_ID_SDMA0_JT = 16, + FIRMWARE_ID_SDMA1_UCODE = 17, + FIRMWARE_ID_SDMA1_JT = 18, + FIRMWARE_ID_CP_CE = 19, + FIRMWARE_ID_CP_PFP = 20, + FIRMWARE_ID_CP_ME = 21, + FIRMWARE_ID_CP_MEC = 22, + FIRMWARE_ID_CP_MES = 23, + FIRMWARE_ID_MES_STACK = 24, + FIRMWARE_ID_RLC_SRM_DRAM_SR = 25, + FIRMWARE_ID_RLCG_SCRATCH_SR = 26, + FIRMWARE_ID_RLCP_SCRATCH_SR = 27, + FIRMWARE_ID_RLCV_SCRATCH_SR = 28, + FIRMWARE_ID_RLX6_DRAM_SR = 29, + FIRMWARE_ID_SDMA0_PG_CONTEXT = 30, + FIRMWARE_ID_SDMA1_PG_CONTEXT = 31, + FIRMWARE_ID_GLOBAL_MUX_SELECT_RAM = 32, + FIRMWARE_ID_SE0_MUX_SELECT_RAM = 33, + FIRMWARE_ID_SE1_MUX_SELECT_RAM = 34, + FIRMWARE_ID_ACCUM_CTRL_RAM = 35, + FIRMWARE_ID_RLCP_CAM = 36, + FIRMWARE_ID_RLC_SPP_CAM_EXT = 37, + FIRMWARE_ID_MAX = 38, +} FIRMWARE_ID; + +typedef struct _RLC_TABLE_OF_CONTENT { + union { + unsigned int DW0; + struct { + unsigned int offset : 25; + unsigned int id : 7; + }; + }; + + union { + unsigned int DW1; + struct { + unsigned int load_at_boot : 1; + unsigned int load_at_vddgfx : 1; + unsigned int load_at_reset : 1; + unsigned int memory_destination : 2; + unsigned int vfflr_image_code : 4; + unsigned int load_mode_direct : 1; + unsigned int save_for_vddgfx : 1; + unsigned int save_for_vfflr : 1; + unsigned int reserved : 1; + unsigned int signed_source : 1; + unsigned int size : 18; + }; + }; + + union { + unsigned int DW2; + struct { + unsigned int indirect_addr_reg : 16; + unsigned int index : 16; + }; + }; + + union { + unsigned int DW3; + struct { + unsigned int indirect_data_reg : 16; + unsigned int indirect_start_offset : 16; + }; + }; +} RLC_TABLE_OF_CONTENT; + +#define RLC_TOC_MAX_SIZE 64 + struct amdgpu_rlc_funcs { bool (*is_rlc_enabled)(struct amdgpu_device *adev); void (*set_safe_mode)(struct amdgpu_device *adev); @@ -85,6 +173,16 @@ struct amdgpu_rlc { u8 *save_restore_list_srm; bool is_rlc_v2_1; + + /* for rlc autoload */ + struct amdgpu_bo *rlc_autoload_bo; + u64 rlc_autoload_gpu_addr; + void *rlc_autoload_ptr; + + /* rlc toc buffer */ + struct amdgpu_bo *rlc_toc_bo; + uint64_t rlc_toc_gpu_addr; + void *rlc_toc_buf; }; void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index a425329d1897..5c13c503e61f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -24,6 +24,10 @@ #include "amdgpu.h" #include "amdgpu_sdma.h" +#define AMDGPU_CSA_SDMA_SIZE 64 +/* SDMA CSA reside in the 3rd page of CSA */ +#define AMDGPU_CSA_SDMA_OFFSET (4096 * 2) + /* * GPU SDMA IP block helpers function. */ @@ -56,3 +60,26 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index) return -EINVAL; } + +uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, + unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint64_t csa_mc_addr; + uint32_t index = 0; + int r; + + if (vmid == 0 || !amdgpu_mcbp) + return 0; + + r = amdgpu_sdma_get_index_from_ring(ring, &index); + + if (r || index > 31) + csa_mc_addr = 0; + else + csa_mc_addr = amdgpu_csa_vaddr(adev) + + AMDGPU_CSA_SDMA_OFFSET + + index * AMDGPU_CSA_SDMA_SIZE; + + return csa_mc_addr; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 1ba9ba3b54f7..35dd152f9d5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -97,5 +97,5 @@ struct amdgpu_buffer_funcs { struct amdgpu_sdma_instance * amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); - +uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h new file mode 100644 index 000000000000..f4176cb01790 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_socbb.h @@ -0,0 +1,82 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_SOCBB_H__ +#define __AMDGPU_SOCBB_H__ + +struct gpu_info_voltage_scaling_v1_0 { + uint32_t state; + uint32_t dscclk_mhz; + uint32_t dcfclk_mhz; + uint32_t socclk_mhz; + uint32_t dram_speed_mts; + uint32_t fabricclk_mhz; + uint32_t dispclk_mhz; + uint32_t phyclk_mhz; + uint32_t dppclk_mhz; +}; + +struct gpu_info_soc_bounding_box_v1_0 { + uint32_t sr_exit_time_us; + uint32_t sr_enter_plus_exit_time_us; + uint32_t urgent_latency_us; + uint32_t urgent_latency_pixel_data_only_us; + uint32_t urgent_latency_pixel_mixed_with_vm_data_us; + uint32_t urgent_latency_vm_data_only_us; + uint32_t writeback_latency_us; + uint32_t ideal_dram_bw_after_urgent_percent; + uint32_t pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly + uint32_t pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm; + uint32_t pct_ideal_dram_sdp_bw_after_urgent_vm_only; + uint32_t max_avg_sdp_bw_use_normal_percent; + uint32_t max_avg_dram_bw_use_normal_percent; + uint32_t max_request_size_bytes; + uint32_t downspread_percent; + uint32_t dram_page_open_time_ns; + uint32_t dram_rw_turnaround_time_ns; + uint32_t dram_return_buffer_per_channel_bytes; + uint32_t dram_channel_width_bytes; + uint32_t fabric_datapath_to_dcn_data_return_bytes; + uint32_t dcn_downspread_percent; + uint32_t dispclk_dppclk_vco_speed_mhz; + uint32_t dfs_vco_period_ps; + uint32_t urgent_out_of_order_return_per_channel_pixel_only_bytes; + uint32_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; + uint32_t urgent_out_of_order_return_per_channel_vm_only_bytes; + uint32_t round_trip_ping_latency_dcfclk_cycles; + uint32_t urgent_out_of_order_return_per_channel_bytes; + uint32_t channel_interleave_bytes; + uint32_t num_banks; + uint32_t num_chans; + uint32_t vmm_page_size_bytes; + uint32_t dram_clock_change_latency_us; + uint32_t writeback_dram_clock_change_latency_us; + uint32_t return_bus_width_bytes; + uint32_t voltage_override; + uint32_t xfc_bus_transport_time_us; + uint32_t xfc_xbuf_latency_tolerance_us; + uint32_t use_urgent_burst_bw; + uint32_t num_states; + struct gpu_info_voltage_scaling_v1_0 clock_limits[8]; +}; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d81bebf76310..e51b48ac48eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -391,6 +391,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, src->mem); src_node_size = (src_mm->size << PAGE_SHIFT); + src_page_offset = 0; } else { src_node_start += cur_size; src_page_offset = src_node_start & (PAGE_SIZE - 1); @@ -400,6 +401,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, dst->mem); dst_node_size = (dst_mm->size << PAGE_SHIFT); + dst_page_offset = 0; } else { dst_node_start += cur_size; dst_page_offset = dst_node_start & (PAGE_SIZE - 1); @@ -487,6 +489,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { + pr_err("Failed to find GTT space for blit from VRAM\n"); return r; } @@ -545,6 +548,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { + pr_err("Failed to find GTT space for blit to VRAM\n"); return r; } @@ -565,6 +569,30 @@ out_cleanup: } /** + * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy + * + * Called by amdgpu_bo_move() + */ +static bool amdgpu_mem_visible(struct amdgpu_device *adev, + struct ttm_mem_reg *mem) +{ + struct drm_mm_node *nodes = mem->mm_node; + + if (mem->mem_type == TTM_PL_SYSTEM || + mem->mem_type == TTM_PL_TT) + return true; + if (mem->mem_type != TTM_PL_VRAM) + return false; + + /* ttm_mem_reg_ioremap only supports contiguous memory */ + if (nodes->size != mem->num_pages) + return false; + + return ((nodes->start + nodes->size) << PAGE_SHIFT) + <= adev->gmc.visible_vram_size; +} + +/** * amdgpu_bo_move - Move a buffer object to a new memory location * * Called by ttm_bo_handle_move_mem() @@ -608,8 +636,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return 0; } - if (!adev->mman.buffer_funcs_enabled) + if (!adev->mman.buffer_funcs_enabled) { + r = -ENODEV; goto memcpy; + } if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == TTM_PL_SYSTEM) { @@ -624,10 +654,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, ctx, new_mem); - if (r) { + /* Check that all memory is CPU accessible */ + if (!amdgpu_mem_visible(adev, old_mem) || + !amdgpu_mem_visible(adev, new_mem)) { + pr_err("Move buffer fallback to memcpy unavailable\n"); return r; } + + r = ttm_bo_move_memcpy(bo, ctx, new_mem); + if (r) + return r; } if (bo->type == ttm_bo_type_device && @@ -716,8 +752,7 @@ struct amdgpu_ttm_tt { struct task_struct *usertask; uint32_t userflags; #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - struct hmm_range *ranges; - int nr_ranges; + struct hmm_range *range; #endif }; @@ -730,57 +765,44 @@ struct amdgpu_ttm_tt { */ #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -/* Support Userptr pages cross max 16 vmas */ -#define MAX_NR_VMAS (16) +#define MAX_RETRY_HMM_RANGE_FAULT 16 -int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { + struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL; + struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; unsigned long start = gtt->userptr; - unsigned long end = start + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS]; - struct hmm_range *ranges; - unsigned long nr_pages, i; - uint64_t *pfns, f; + struct vm_area_struct *vma; + struct hmm_range *range; + unsigned long i; + uint64_t *pfns; + int retry = 0; int r = 0; if (!mm) /* Happens during process shutdown */ return -ESRCH; - down_read(&mm->mmap_sem); - - /* user pages may cross multiple VMAs */ - gtt->nr_ranges = 0; - do { - unsigned long vm_start; - - if (gtt->nr_ranges >= MAX_NR_VMAS) { - DRM_ERROR("Too many VMAs in userptr range\n"); - r = -EFAULT; - goto out; - } - - vm_start = vma ? vma->vm_end : start; - vma = find_vma(mm, vm_start); - if (unlikely(!vma || vm_start < vma->vm_start)) { - r = -EFAULT; - goto out; - } - vmas[gtt->nr_ranges++] = vma; - } while (end > vma->vm_end); - - DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n", - start, gtt->nr_ranges, ttm->num_pages); + if (unlikely(!mirror)) { + DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n"); + r = -EFAULT; + goto out; + } + vma = find_vma(mm, start); + if (unlikely(!vma || start < vma->vm_start)) { + r = -EFAULT; + goto out; + } if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && - vmas[0]->vm_file)) { + vma->vm_file)) { r = -EPERM; goto out; } - ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL); - if (unlikely(!ranges)) { + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (unlikely(!range)) { r = -ENOMEM; goto out; } @@ -791,61 +813,67 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) goto out_free_ranges; } - for (i = 0; i < gtt->nr_ranges; i++) - amdgpu_hmm_init_range(&ranges[i]); + amdgpu_hmm_init_range(range); + range->default_flags = range->flags[HMM_PFN_VALID]; + range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ? + 0 : range->flags[HMM_PFN_WRITE]; + range->pfn_flags_mask = 0; + range->pfns = pfns; + hmm_range_register(range, mirror, start, + start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); - f = ranges[0].flags[HMM_PFN_VALID]; - f |= amdgpu_ttm_tt_is_readonly(ttm) ? - 0 : ranges[0].flags[HMM_PFN_WRITE]; - memset64(pfns, f, ttm->num_pages); +retry: + /* + * Just wait for range to be valid, safe to ignore return value as we + * will use the return value of hmm_range_fault() below under the + * mmap_sem to ascertain the validity of the range. + */ + hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); - for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) { - ranges[i].vma = vmas[i]; - ranges[i].start = max(start, vmas[i]->vm_start); - ranges[i].end = min(end, vmas[i]->vm_end); - ranges[i].pfns = pfns + nr_pages; - nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE; + down_read(&mm->mmap_sem); - r = hmm_vma_fault(&ranges[i], true); - if (unlikely(r)) - break; - } - if (unlikely(r)) { - while (i--) - hmm_vma_range_done(&ranges[i]); + r = hmm_range_fault(range, true); + if (unlikely(r < 0)) { + if (likely(r == -EAGAIN)) { + /* + * return -EAGAIN, mmap_sem is dropped + */ + if (retry++ < MAX_RETRY_HMM_RANGE_FAULT) + goto retry; + else + pr_err("Retry hmm fault too many times\n"); + } - goto out_free_pfns; + goto out_up_read; } up_read(&mm->mmap_sem); for (i = 0; i < ttm->num_pages; i++) { - pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]); - if (!pages[i]) { + pages[i] = hmm_device_entry_to_page(range, pfns[i]); + if (unlikely(!pages[i])) { pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", i, pfns[i]); - goto out_invalid_pfn; + r = -ENOMEM; + + goto out_free_pfns; } } - gtt->ranges = ranges; + + gtt->range = range; return 0; +out_up_read: + if (likely(r != -EAGAIN)) + up_read(&mm->mmap_sem); out_free_pfns: + hmm_range_unregister(range); kvfree(pfns); out_free_ranges: - kvfree(ranges); + kfree(range); out: - up_read(&mm->mmap_sem); - return r; - -out_invalid_pfn: - for (i = 0; i < gtt->nr_ranges; i++) - hmm_vma_range_done(&ranges[i]); - kvfree(pfns); - kvfree(ranges); - return -ENOMEM; } /** @@ -858,23 +886,23 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; bool r = false; - int i; if (!gtt || !gtt->userptr) return false; - DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n", - gtt->userptr, gtt->nr_ranges, ttm->num_pages); + DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n", + gtt->userptr, ttm->num_pages); - WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns, + WARN_ONCE(!gtt->range || !gtt->range->pfns, "No user pages to check\n"); - if (gtt->ranges) { - for (i = 0; i < gtt->nr_ranges; i++) - r |= hmm_vma_range_done(>t->ranges[i]); - kvfree(gtt->ranges[0].pfns); - kvfree(gtt->ranges); - gtt->ranges = NULL; + if (gtt->range) { + r = hmm_range_valid(gtt->range); + hmm_range_unregister(gtt->range); + + kvfree(gtt->range->pfns); + kfree(gtt->range); + gtt->range = NULL; } return r; @@ -958,9 +986,9 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) sg_free_table(ttm->sg); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - if (gtt->ranges && - ttm->pages[0] == hmm_pfn_to_page(>t->ranges[0], - gtt->ranges[0].pfns[0])) + if (gtt->range && + ttm->pages[0] == hmm_device_entry_to_page(gtt->range, + gtt->range->pfns[0])) WARN_ONCE(1, "Missing get_user_page_done\n"); #endif } @@ -983,8 +1011,8 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, goto gart_bind_fail; /* Patch mtype of the second part BO */ - flags &= ~AMDGPU_PTE_MTYPE_MASK; - flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC); + flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; + flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); r = amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), @@ -2067,9 +2095,9 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, mm_node = bo->tbo.mem.mm_node; num_loops = 0; while (num_pages) { - uint32_t byte_count = mm_node->size << PAGE_SHIFT; + uint64_t byte_count = mm_node->size << PAGE_SHIFT; - num_loops += DIV_ROUND_UP(byte_count, max_bytes); + num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes); num_pages -= mm_node->size; ++mm_node; } @@ -2095,12 +2123,13 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, mm_node = bo->tbo.mem.mm_node; while (num_pages) { - uint32_t byte_count = mm_node->size << PAGE_SHIFT; + uint64_t byte_count = mm_node->size << PAGE_SHIFT; uint64_t dst_addr; dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); while (byte_count) { - uint32_t cur_size_in_bytes = min(byte_count, max_bytes); + uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count, + max_bytes); amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr, cur_size_in_bytes); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index c2b7669004ba..caa76c693700 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -102,10 +102,11 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages); bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); #else -static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) +static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, + struct page **pages) { return -EPERM; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 524f70f2b52f..bfaa0eac3213 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -77,6 +77,14 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr) container_of(hdr, struct smc_firmware_header_v1_0, header); DRM_DEBUG("ucode_start_addr: %u\n", le32_to_cpu(smc_hdr->ucode_start_addr)); + } else if (version_major == 2) { + const struct smc_firmware_header_v1_0 *v1_hdr = + container_of(hdr, struct smc_firmware_header_v1_0, header); + const struct smc_firmware_header_v2_0 *v2_hdr = + container_of(v1_hdr, struct smc_firmware_header_v2_0, v1_0); + + DRM_INFO("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); + DRM_INFO("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes)); } else { DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor); } @@ -227,6 +235,46 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr) } } +void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr) +{ + uint16_t version_major = le16_to_cpu(hdr->header_version_major); + uint16_t version_minor = le16_to_cpu(hdr->header_version_minor); + + DRM_DEBUG("PSP\n"); + amdgpu_ucode_print_common_hdr(hdr); + + if (version_major == 1) { + const struct psp_firmware_header_v1_0 *psp_hdr = + container_of(hdr, struct psp_firmware_header_v1_0, header); + + DRM_DEBUG("ucode_feature_version: %u\n", + le32_to_cpu(psp_hdr->ucode_feature_version)); + DRM_DEBUG("sos_offset_bytes: %u\n", + le32_to_cpu(psp_hdr->sos_offset_bytes)); + DRM_DEBUG("sos_size_bytes: %u\n", + le32_to_cpu(psp_hdr->sos_size_bytes)); + if (version_minor == 1) { + const struct psp_firmware_header_v1_1 *psp_hdr_v1_1 = + container_of(psp_hdr, struct psp_firmware_header_v1_1, v1_0); + DRM_DEBUG("toc_header_version: %u\n", + le32_to_cpu(psp_hdr_v1_1->toc_header_version)); + DRM_DEBUG("toc_offset_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_1->toc_offset_bytes)); + DRM_DEBUG("toc_size_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_1->toc_size_bytes)); + DRM_DEBUG("kdb_header_version: %u\n", + le32_to_cpu(psp_hdr_v1_1->kdb_header_version)); + DRM_DEBUG("kdb_offset_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_1->kdb_offset_bytes)); + DRM_DEBUG("kdb_size_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_1->kdb_size_bytes)); + } + } else { + DRM_ERROR("Unknown PSP ucode version: %u.%u\n", + version_major, version_minor); + } +} + void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr) { uint16_t version_major = le16_to_cpu(hdr->header_version_major); @@ -302,6 +350,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_RAVEN: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_NAVI10: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index ec4c2ea1f05a..c1fb6dc86440 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -23,6 +23,8 @@ #ifndef __AMDGPU_UCODE_H__ #define __AMDGPU_UCODE_H__ +#include "amdgpu_socbb.h" + struct common_firmware_header { uint32_t size_bytes; /* size of the entire header+image(s) in bytes */ uint32_t header_size_bytes; /* size of just the header in bytes */ @@ -49,6 +51,26 @@ struct smc_firmware_header_v1_0 { uint32_t ucode_start_addr; }; +/* version_major=2, version_minor=0 */ +struct smc_firmware_header_v2_0 { + struct smc_firmware_header_v1_0 v1_0; + uint32_t ppt_offset_bytes; /* soft pptable offset */ + uint32_t ppt_size_bytes; /* soft pptable size */ +}; + +struct smc_soft_pptable_entry { + uint32_t id; + uint32_t ppt_offset_bytes; + uint32_t ppt_size_bytes; +}; + +/* version_major=2, version_minor=1 */ +struct smc_firmware_header_v2_1 { + struct smc_firmware_header_v1_0 v1_0; + uint32_t pptable_count; + uint32_t pptable_entry_offset; +}; + /* version_major=1, version_minor=0 */ struct psp_firmware_header_v1_0 { struct common_firmware_header header; @@ -57,6 +79,17 @@ struct psp_firmware_header_v1_0 { uint32_t sos_size_bytes; }; +/* version_major=1, version_minor=1 */ +struct psp_firmware_header_v1_1 { + struct psp_firmware_header_v1_0 v1_0; + uint32_t toc_header_version; + uint32_t toc_offset_bytes; + uint32_t toc_size_bytes; + uint32_t kdb_header_version; + uint32_t kdb_offset_bytes; + uint32_t kdb_size_bytes; +}; + /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; @@ -77,6 +110,21 @@ struct gfx_firmware_header_v1_0 { }; /* version_major=1, version_minor=0 */ +struct mes_firmware_header_v1_0 { + struct common_firmware_header header; + uint32_t mes_ucode_version; + uint32_t mes_ucode_size_bytes; + uint32_t mes_ucode_offset_bytes; + uint32_t mes_ucode_data_version; + uint32_t mes_ucode_data_size_bytes; + uint32_t mes_ucode_data_offset_bytes; + uint32_t mes_uc_start_addr_lo; + uint32_t mes_uc_start_addr_hi; + uint32_t mes_data_start_addr_lo; + uint32_t mes_data_start_addr_hi; +}; + +/* version_major=1, version_minor=0 */ struct rlc_firmware_header_v1_0 { struct common_firmware_header header; uint32_t ucode_feature_version; @@ -161,6 +209,19 @@ struct gpu_info_firmware_v1_0 { uint32_t gc_lds_size; }; +struct gpu_info_firmware_v1_1 { + struct gpu_info_firmware_v1_0 v1_0; + uint32_t num_sc_per_sh; + uint32_t num_packer_per_sc; +}; + +/* gpu info payload + * version_major=1, version_minor=1 */ +struct gpu_info_firmware_v1_2 { + struct gpu_info_firmware_v1_1 v1_1; + struct gpu_info_soc_bounding_box_v1_0 soc_bounding_box; +}; + /* version_major=1, version_minor=0 */ struct gpu_info_firmware_header_v1_0 { struct common_firmware_header header; @@ -180,7 +241,9 @@ union amdgpu_firmware_header { struct common_firmware_header common; struct mc_firmware_header_v1_0 mc; struct smc_firmware_header_v1_0 smc; + struct smc_firmware_header_v2_0 smc_v2_0; struct psp_firmware_header_v1_0 psp; + struct psp_firmware_header_v1_1 psp_v1_1; struct ta_firmware_header_v1_0 ta; struct gfx_firmware_header_v1_0 gfx; struct rlc_firmware_header_v1_0 rlc; @@ -206,6 +269,8 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MEC1_JT, AMDGPU_UCODE_ID_CP_MEC2, AMDGPU_UCODE_ID_CP_MEC2_JT, + AMDGPU_UCODE_ID_CP_MES, + AMDGPU_UCODE_ID_CP_MES_DATA, AMDGPU_UCODE_ID_RLC_G, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, @@ -218,6 +283,8 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_VCN, AMDGPU_UCODE_ID_DMCU_ERAM, AMDGPU_UCODE_ID_DMCU_INTV, + AMDGPU_UCODE_ID_VCN0_RAM, + AMDGPU_UCODE_ID_VCN1_RAM, AMDGPU_UCODE_ID_MAXIMUM, }; @@ -232,6 +299,7 @@ enum amdgpu_firmware_load_type { AMDGPU_FW_LOAD_DIRECT = 0, AMDGPU_FW_LOAD_SMU, AMDGPU_FW_LOAD_PSP, + AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO, }; /* conform to smu_ucode_xfer_cz.h */ @@ -284,6 +352,7 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); +void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr); int amdgpu_ucode_validate(const struct firmware *fw); bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 9f32bf862d94..2e12eeb314a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -46,10 +46,12 @@ #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" +#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); MODULE_FIRMWARE(FIRMWARE_RAVEN2); +MODULE_FIRMWARE(FIRMWARE_NAVI10); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -72,6 +74,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) else fw_name = FIRMWARE_RAVEN; break; + case CHIP_NAVI10: + fw_name = FIRMWARE_NAVI10; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -133,6 +141,16 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) return r; } + if (adev->vcn.indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.dpg_sram_bo, + &adev->vcn.dpg_sram_gpu_addr, &adev->vcn.dpg_sram_cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate DPG bo\n", r); + return r; + } + } + return 0; } @@ -142,6 +160,12 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) kvfree(adev->vcn.saved_bo); + if (adev->vcn.indirect_sram) { + amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, + &adev->vcn.dpg_sram_gpu_addr, + (void **)&adev->vcn.dpg_sram_cpu_addr); + } + amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, &adev->vcn.gpu_addr, (void **)&adev->vcn.cpu_addr); @@ -245,7 +269,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) if (fences == 0) { amdgpu_gfx_off_ctrl(adev, true); - if (adev->pm.dpm_enabled) + if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, false); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -262,7 +286,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); - if (adev->pm.dpm_enabled) + if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, true); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -308,17 +332,15 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); + WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; - - amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); + tmp = RREG32(adev->vcn.external.scratch9); if (tmp == 0xDEADBEEF) break; udelay(1); @@ -347,14 +369,14 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, ib = &job->ibs[0]; addr = amdgpu_bo_gpu_offset(bo); - ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0); + ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0); ib->ptr[1] = addr; - ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0); + ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0); ib->ptr[3] = addr >> 32; - ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0); + ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0); ib->ptr[5] = 0; for (i = 6; i < 16; i += 2) { - ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0); + ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0); ib->ptr[i+1] = 0; } ib->length_dw = 16; @@ -629,19 +651,17 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); + WREG32(adev->vcn.external.jpeg_pitch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) return r; - amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.jpeg_pitch, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); + tmp = RREG32(adev->vcn.external.jpeg_pitch); if (tmp == 0xDEADBEEF) break; udelay(1); @@ -669,7 +689,7 @@ static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle, ib = &job->ibs[0]; - ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, PACKETJ_TYPE0); + ib->ptr[0] = PACKETJ(adev->vcn.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0); ib->ptr[1] = 0xDEADBEEF; for (i = 2; i < 16; i += 2) { ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); @@ -715,7 +735,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) } for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); + tmp = RREG32(adev->vcn.external.jpeg_pitch); if (tmp == 0xDEADBEEF) break; udelay(1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index a1ee19251aae..99f14fcc1460 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -25,7 +25,7 @@ #define __AMDGPU_VCN_H__ #define AMDGPU_VCN_STACK_SIZE (128*1024) -#define AMDGPU_VCN_CONTEXT_SIZE (512*1024) +#define AMDGPU_VCN_CONTEXT_SIZE (512*1024) #define AMDGPU_VCN_FIRMWARE_OFFSET 256 #define AMDGPU_VCN_MAX_ENC_RINGS 3 @@ -45,6 +45,11 @@ #define VCN_ENC_CMD_REG_WRITE 0x0000000b #define VCN_ENC_CMD_REG_WAIT 0x0000000c +#define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 +#define VCN_AON_SOC_ADDRESS_2_0 0x1f800 +#define VCN_VID_IP_ADDRESS_2_0 0x0 +#define VCN_AON_IP_ADDRESS_2_0 0x30000 + #define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ @@ -66,8 +71,55 @@ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ } while (0) +#define SOC15_DPG_MODE_OFFSET_2_0(ip, inst, reg) \ + ({ \ + uint32_t internal_reg_offset, addr; \ + bool video_range, aon_range; \ + \ + addr = (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ + addr <<= 2; \ + video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS_2_0)) && \ + ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS_2_0 + 0x2600))))); \ + aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS_2_0)) && \ + ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS_2_0 + 0x600))))); \ + if (video_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS_2_0) + \ + (VCN_VID_IP_ADDRESS_2_0)); \ + else if (aon_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS_2_0) + \ + (VCN_AON_IP_ADDRESS_2_0)); \ + else \ + internal_reg_offset = (0xFFFFF & addr); \ + \ + internal_reg_offset >>= 2; \ + }) + +#define RREG32_SOC15_DPG_MODE_2_0(offset, mask_en) \ + ({ \ + WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ + (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA); \ + }) + +#define WREG32_SOC15_DPG_MODE_2_0(offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(VCN, 0, mmUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.dpg_sram_curr_addr++ = offset; \ + *adev->vcn.dpg_sram_curr_addr++ = value; \ + } \ + } while (0) + enum engine_status_constants { UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0, + UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0 = 0xAAAA0, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, UVD_STATUS__UVD_BUSY = 0x00000004, GB_ADDR_CONFIG_DEFAULT = 0x26010011, @@ -75,6 +127,7 @@ enum engine_status_constants { UVD_STATUS__BUSY = 0x5, UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1, UVD_STATUS__RBC_BUSY = 0x1, + UVD_PGFSM_STATUS_UVDJ_PWR_ON = 0, }; enum internal_dpg_state { @@ -87,6 +140,15 @@ struct dpg_pause_state { enum internal_dpg_state jpeg; }; +struct amdgpu_vcn_reg{ + unsigned data0; + unsigned data1; + unsigned cmd; + unsigned nop; + unsigned scratch9; + unsigned jpeg_pitch; +}; + struct amdgpu_vcn { struct amdgpu_bo *vcpu_bo; void *cpu_addr; @@ -102,8 +164,15 @@ struct amdgpu_vcn { unsigned num_enc_rings; enum amd_powergating_state cur_state; struct dpg_pause_state pause_state; + struct amdgpu_vcn_reg internal, external; int (*pause_dpg_mode)(struct amdgpu_device *adev, struct dpg_pause_state *new_state); + + bool indirect_sram; + struct amdgpu_bo *dpg_sram_bo; + void *dpg_sram_cpu_addr; + uint64_t dpg_sram_gpu_addr; + uint32_t *dpg_sram_curr_addr; }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 07a7e3820b7b..59dd204498c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -390,7 +390,8 @@ static uint32_t parse_clk(char *buf, bool min) if (!ptr) break; ptr+=2; - clk = simple_strtoul(ptr, NULL, 10); + if (kstrtou32(ptr, 10, &clk)) + return 0; } while (!min); return clk * 100; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index dca25deee75c..f5107731e9c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -253,6 +253,7 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ; struct amdgpu_virt { uint32_t caps; struct amdgpu_bo *csa_obj; + void *csa_cpu_addr; bool chained_ib_support; uint32_t reg_val_offs; struct amdgpu_irq_src ack_irq; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e44f9dd202e8..24c3c05e2fb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1574,12 +1574,22 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, flags &= ~AMDGPU_PTE_EXECUTABLE; flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; - flags &= ~AMDGPU_PTE_MTYPE_MASK; - flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK); + if (adev->asic_type == CHIP_NAVI10) { + flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; + flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); + } else { + flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; + flags |= (mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK); + } if ((mapping->flags & AMDGPU_PTE_PRT) && (adev->asic_type >= CHIP_VEGA10)) { flags |= AMDGPU_PTE_PRT; + if (adev->asic_type >= CHIP_NAVI10) { + flags |= AMDGPU_PTE_SNOOPED; + flags |= AMDGPU_PTE_LOG; + flags |= AMDGPU_PTE_SYSTEM; + } flags &= ~AMDGPU_PTE_VALID; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 91baf95212a6..489a162ca620 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -67,6 +67,8 @@ struct amdgpu_bo_list_entry; /* PDE is handled as PTE for VEGA10 */ #define AMDGPU_PDE_PTE (1ULL << 54) +#define AMDGPU_PTE_LOG (1ULL << 55) + /* PTE is handled as PDE for VEGA10 (Translate Further) */ #define AMDGPU_PTE_TF (1ULL << 56) @@ -75,8 +77,8 @@ struct amdgpu_bo_list_entry; /* For GFX9 */ -#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) -#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) +#define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57) +#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL) #define AMDGPU_MTYPE_NC 0 #define AMDGPU_MTYPE_CC 2 @@ -86,7 +88,11 @@ struct amdgpu_bo_list_entry; | AMDGPU_PTE_EXECUTABLE \ | AMDGPU_PTE_READABLE \ | AMDGPU_PTE_WRITEABLE \ - | AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC)) + | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC)) + +/* NAVI10 only */ +#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48) +#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL) /* How to programm VM fault handling */ #define AMDGPU_VM_FAULT_STOP_NEVER 0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 8abc9b6892ea..3a9d8c15fe9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -275,7 +275,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; unsigned long lpfn, num_nodes, pages_per_node, pages_left; - uint64_t usage = 0, vis_usage = 0; + uint64_t vis_usage = 0, mem_bytes; unsigned i; int r; @@ -283,20 +283,34 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (!lpfn) lpfn = man->size; - if (place->flags & TTM_PL_FLAG_CONTIGUOUS || - amdgpu_vram_page_split == -1) { + /* bail out quickly if there's likely not enough VRAM for this BO */ + mem_bytes = (u64)mem->num_pages << PAGE_SHIFT; + if (atomic64_add_return(mem_bytes, &mgr->usage) > adev->gmc.mc_vram_size) { + atomic64_sub(mem_bytes, &mgr->usage); + mem->mm_node = NULL; + return 0; + } + + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { pages_per_node = ~0ul; num_nodes = 1; } else { - pages_per_node = max((uint32_t)amdgpu_vram_page_split, - mem->page_alignment); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + pages_per_node = HPAGE_PMD_NR; +#else + /* default to 2MB */ + pages_per_node = (2UL << (20UL - PAGE_SHIFT)); +#endif + pages_per_node = max((uint32_t)pages_per_node, mem->page_alignment); num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); } - nodes = kvmalloc_array(num_nodes, sizeof(*nodes), + nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes), GFP_KERNEL | __GFP_ZERO); - if (!nodes) + if (!nodes) { + atomic64_sub(mem_bytes, &mgr->usage); return -ENOMEM; + } mode = DRM_MM_INSERT_BEST; if (place->flags & TTM_PL_FLAG_TOPDOWN) @@ -316,7 +330,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (unlikely(r)) break; - usage += nodes[i].size << PAGE_SHIFT; vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); amdgpu_vram_mgr_virt_start(mem, &nodes[i]); pages_left -= pages; @@ -336,14 +349,12 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (unlikely(r)) goto error; - usage += nodes[i].size << PAGE_SHIFT; vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); amdgpu_vram_mgr_virt_start(mem, &nodes[i]); pages_left -= pages; } spin_unlock(&mgr->lock); - atomic64_add(usage, &mgr->usage); atomic64_add(vis_usage, &mgr->vis_usage); mem->mm_node = nodes; @@ -354,6 +365,7 @@ error: while (i--) drm_mm_remove_node(&nodes[i]); spin_unlock(&mgr->lock); + atomic64_sub(mem->num_pages << PAGE_SHIFT, &mgr->usage); kvfree(nodes); return r == -ENOSPC ? 0 : r; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c new file mode 100644 index 000000000000..89b32b6b81c8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -0,0 +1,101 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "athub_v2_0.h" + +#include "athub/athub_2_0_0_offset.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_default.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +static void +athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +static void +athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +int athub_v2_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_NAVI10: + athub_v2_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + athub_v2_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_ATHUB_MGCG */ + data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; + + /* AMD_CG_SUPPORT_ATHUB_LS */ + if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h new file mode 100644 index 000000000000..02932c1c8bab --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __ATHUB_V2_0_H__ +#define __ATHUB_V2_0_H__ + +int athub_v2_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h new file mode 100644 index 000000000000..27a2ff0a07d2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx10.h @@ -0,0 +1,975 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +static const unsigned int gfx10_SECT_CONTEXT_def_1[] = { + 0x00000000, // DB_RENDER_CONTROL + 0x00000000, // DB_COUNT_CONTROL + 0x00000000, // DB_DEPTH_VIEW + 0x00000000, // DB_RENDER_OVERRIDE + 0x00000000, // DB_RENDER_OVERRIDE2 + 0x00000000, // DB_HTILE_DATA_BASE + 0x00000000, // HOLE + 0x00000000, // DB_DEPTH_SIZE_XY + 0x00000000, // DB_DEPTH_BOUNDS_MIN + 0x00000000, // DB_DEPTH_BOUNDS_MAX + 0x00000000, // DB_STENCIL_CLEAR + 0x00000000, // DB_DEPTH_CLEAR + 0x00000000, // PA_SC_SCREEN_SCISSOR_TL + 0x40004000, // PA_SC_SCREEN_SCISSOR_BR + 0x00000000, // DB_DFSM_CONTROL + 0x00000000, // DB_DEPTH_INFO + 0x00000000, // DB_Z_INFO + 0x00000000, // DB_STENCIL_INFO + 0x00000000, // DB_Z_READ_BASE + 0x00000000, // DB_STENCIL_READ_BASE + 0x00000000, // DB_Z_WRITE_BASE + 0x00000000, // DB_STENCIL_WRITE_BASE + 0x00000000, // DB_DEPTH_SIZE + 0x00000000, // DB_DEPTH_SLICE + 0x00000000, // DB_Z_INFO2 + 0x00000000, // DB_STENCIL_INFO2 + 0x00000000, // DB_Z_READ_BASE_HI + 0x00000000, // DB_STENCIL_READ_BASE_HI + 0x00000000, // DB_Z_WRITE_BASE_HI + 0x00000000, // DB_STENCIL_WRITE_BASE_HI + 0x00000000, // DB_HTILE_DATA_BASE_HI + 0x00150055, // DB_RMI_L2_CACHE_CONTROL + 0x00000000, // TA_BC_BASE_ADDR + 0x00000000, // TA_BC_BASE_ADDR_HI + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // COHER_DEST_BASE_HI_0 + 0x00000000, // COHER_DEST_BASE_HI_1 + 0x00000000, // COHER_DEST_BASE_HI_2 + 0x00000000, // COHER_DEST_BASE_HI_3 + 0x00000000, // COHER_DEST_BASE_2 + 0x00000000, // COHER_DEST_BASE_3 + 0x00000000, // PA_SC_WINDOW_OFFSET + 0x80000000, // PA_SC_WINDOW_SCISSOR_TL + 0x40004000, // PA_SC_WINDOW_SCISSOR_BR + 0x0000ffff, // PA_SC_CLIPRECT_RULE + 0x00000000, // PA_SC_CLIPRECT_0_TL + 0x40004000, // PA_SC_CLIPRECT_0_BR + 0x00000000, // PA_SC_CLIPRECT_1_TL + 0x40004000, // PA_SC_CLIPRECT_1_BR + 0x00000000, // PA_SC_CLIPRECT_2_TL + 0x40004000, // PA_SC_CLIPRECT_2_BR + 0x00000000, // PA_SC_CLIPRECT_3_TL + 0x40004000, // PA_SC_CLIPRECT_3_BR + 0xaa99aaaa, // PA_SC_EDGERULE + 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET + 0xffffffff, // CB_TARGET_MASK + 0xffffffff, // CB_SHADER_MASK + 0x80000000, // PA_SC_GENERIC_SCISSOR_TL + 0x40004000, // PA_SC_GENERIC_SCISSOR_BR + 0x00000000, // COHER_DEST_BASE_0 + 0x00000000, // COHER_DEST_BASE_1 + 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR + 0x00000000, // PA_SC_VPORT_ZMIN_0 + 0x3f800000, // PA_SC_VPORT_ZMAX_0 + 0x00000000, // PA_SC_VPORT_ZMIN_1 + 0x3f800000, // PA_SC_VPORT_ZMAX_1 + 0x00000000, // PA_SC_VPORT_ZMIN_2 + 0x3f800000, // PA_SC_VPORT_ZMAX_2 + 0x00000000, // PA_SC_VPORT_ZMIN_3 + 0x3f800000, // PA_SC_VPORT_ZMAX_3 + 0x00000000, // PA_SC_VPORT_ZMIN_4 + 0x3f800000, // PA_SC_VPORT_ZMAX_4 + 0x00000000, // PA_SC_VPORT_ZMIN_5 + 0x3f800000, // PA_SC_VPORT_ZMAX_5 + 0x00000000, // PA_SC_VPORT_ZMIN_6 + 0x3f800000, // PA_SC_VPORT_ZMAX_6 + 0x00000000, // PA_SC_VPORT_ZMIN_7 + 0x3f800000, // PA_SC_VPORT_ZMAX_7 + 0x00000000, // PA_SC_VPORT_ZMIN_8 + 0x3f800000, // PA_SC_VPORT_ZMAX_8 + 0x00000000, // PA_SC_VPORT_ZMIN_9 + 0x3f800000, // PA_SC_VPORT_ZMAX_9 + 0x00000000, // PA_SC_VPORT_ZMIN_10 + 0x3f800000, // PA_SC_VPORT_ZMAX_10 + 0x00000000, // PA_SC_VPORT_ZMIN_11 + 0x3f800000, // PA_SC_VPORT_ZMAX_11 + 0x00000000, // PA_SC_VPORT_ZMIN_12 + 0x3f800000, // PA_SC_VPORT_ZMAX_12 + 0x00000000, // PA_SC_VPORT_ZMIN_13 + 0x3f800000, // PA_SC_VPORT_ZMAX_13 + 0x00000000, // PA_SC_VPORT_ZMIN_14 + 0x3f800000, // PA_SC_VPORT_ZMAX_14 + 0x00000000, // PA_SC_VPORT_ZMIN_15 + 0x3f800000, // PA_SC_VPORT_ZMAX_15 + 0x00000000, // PA_SC_RASTER_CONFIG + 0x00000000, // PA_SC_RASTER_CONFIG_1 + 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL +}; +static const unsigned int gfx10_SECT_CONTEXT_def_2[] = { + 0x00000000, // CP_PERFMON_CNTX_CNTL + 0x00000000, // CP_RINGID + 0x00000000, // CP_VMID + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // PA_SC_RIGHT_VERT_GRID + 0x00000000, // PA_SC_LEFT_VERT_GRID + 0x00000000, // PA_SC_HORIZ_GRID + 0x00000000, // HOLE + 0x00000000, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0xffffffff, // VGT_MAX_VTX_INDX + 0x00000000, // VGT_MIN_VTX_INDX + 0x00000000, // VGT_INDX_OFFSET + 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX + 0x00550055, // CB_RMI_GL2_CACHE_CONTROL + 0x00000000, // CB_BLEND_RED + 0x00000000, // CB_BLEND_GREEN + 0x00000000, // CB_BLEND_BLUE + 0x00000000, // CB_BLEND_ALPHA + 0x00000000, // CB_DCC_CONTROL + 0x00000000, // CB_COVERAGE_OUT_CONTROL + 0x00000000, // DB_STENCIL_CONTROL + 0x01000000, // DB_STENCILREFMASK + 0x01000000, // DB_STENCILREFMASK_BF + 0, // HOLE + 0x00000000, // PA_CL_VPORT_XSCALE + 0x00000000, // PA_CL_VPORT_XOFFSET + 0x00000000, // PA_CL_VPORT_YSCALE + 0x00000000, // PA_CL_VPORT_YOFFSET + 0x00000000, // PA_CL_VPORT_ZSCALE + 0x00000000, // PA_CL_VPORT_ZOFFSET + 0x00000000, // PA_CL_VPORT_XSCALE_1 + 0x00000000, // PA_CL_VPORT_XOFFSET_1 + 0x00000000, // PA_CL_VPORT_YSCALE_1 + 0x00000000, // PA_CL_VPORT_YOFFSET_1 + 0x00000000, // PA_CL_VPORT_ZSCALE_1 + 0x00000000, // PA_CL_VPORT_ZOFFSET_1 + 0x00000000, // PA_CL_VPORT_XSCALE_2 + 0x00000000, // PA_CL_VPORT_XOFFSET_2 + 0x00000000, // PA_CL_VPORT_YSCALE_2 + 0x00000000, // PA_CL_VPORT_YOFFSET_2 + 0x00000000, // PA_CL_VPORT_ZSCALE_2 + 0x00000000, // PA_CL_VPORT_ZOFFSET_2 + 0x00000000, // PA_CL_VPORT_XSCALE_3 + 0x00000000, // PA_CL_VPORT_XOFFSET_3 + 0x00000000, // PA_CL_VPORT_YSCALE_3 + 0x00000000, // PA_CL_VPORT_YOFFSET_3 + 0x00000000, // PA_CL_VPORT_ZSCALE_3 + 0x00000000, // PA_CL_VPORT_ZOFFSET_3 + 0x00000000, // PA_CL_VPORT_XSCALE_4 + 0x00000000, // PA_CL_VPORT_XOFFSET_4 + 0x00000000, // PA_CL_VPORT_YSCALE_4 + 0x00000000, // PA_CL_VPORT_YOFFSET_4 + 0x00000000, // PA_CL_VPORT_ZSCALE_4 + 0x00000000, // PA_CL_VPORT_ZOFFSET_4 + 0x00000000, // PA_CL_VPORT_XSCALE_5 + 0x00000000, // PA_CL_VPORT_XOFFSET_5 + 0x00000000, // PA_CL_VPORT_YSCALE_5 + 0x00000000, // PA_CL_VPORT_YOFFSET_5 + 0x00000000, // PA_CL_VPORT_ZSCALE_5 + 0x00000000, // PA_CL_VPORT_ZOFFSET_5 + 0x00000000, // PA_CL_VPORT_XSCALE_6 + 0x00000000, // PA_CL_VPORT_XOFFSET_6 + 0x00000000, // PA_CL_VPORT_YSCALE_6 + 0x00000000, // PA_CL_VPORT_YOFFSET_6 + 0x00000000, // PA_CL_VPORT_ZSCALE_6 + 0x00000000, // PA_CL_VPORT_ZOFFSET_6 + 0x00000000, // PA_CL_VPORT_XSCALE_7 + 0x00000000, // PA_CL_VPORT_XOFFSET_7 + 0x00000000, // PA_CL_VPORT_YSCALE_7 + 0x00000000, // PA_CL_VPORT_YOFFSET_7 + 0x00000000, // PA_CL_VPORT_ZSCALE_7 + 0x00000000, // PA_CL_VPORT_ZOFFSET_7 + 0x00000000, // PA_CL_VPORT_XSCALE_8 + 0x00000000, // PA_CL_VPORT_XOFFSET_8 + 0x00000000, // PA_CL_VPORT_YSCALE_8 + 0x00000000, // PA_CL_VPORT_YOFFSET_8 + 0x00000000, // PA_CL_VPORT_ZSCALE_8 + 0x00000000, // PA_CL_VPORT_ZOFFSET_8 + 0x00000000, // PA_CL_VPORT_XSCALE_9 + 0x00000000, // PA_CL_VPORT_XOFFSET_9 + 0x00000000, // PA_CL_VPORT_YSCALE_9 + 0x00000000, // PA_CL_VPORT_YOFFSET_9 + 0x00000000, // PA_CL_VPORT_ZSCALE_9 + 0x00000000, // PA_CL_VPORT_ZOFFSET_9 + 0x00000000, // PA_CL_VPORT_XSCALE_10 + 0x00000000, // PA_CL_VPORT_XOFFSET_10 + 0x00000000, // PA_CL_VPORT_YSCALE_10 + 0x00000000, // PA_CL_VPORT_YOFFSET_10 + 0x00000000, // PA_CL_VPORT_ZSCALE_10 + 0x00000000, // PA_CL_VPORT_ZOFFSET_10 + 0x00000000, // PA_CL_VPORT_XSCALE_11 + 0x00000000, // PA_CL_VPORT_XOFFSET_11 + 0x00000000, // PA_CL_VPORT_YSCALE_11 + 0x00000000, // PA_CL_VPORT_YOFFSET_11 + 0x00000000, // PA_CL_VPORT_ZSCALE_11 + 0x00000000, // PA_CL_VPORT_ZOFFSET_11 + 0x00000000, // PA_CL_VPORT_XSCALE_12 + 0x00000000, // PA_CL_VPORT_XOFFSET_12 + 0x00000000, // PA_CL_VPORT_YSCALE_12 + 0x00000000, // PA_CL_VPORT_YOFFSET_12 + 0x00000000, // PA_CL_VPORT_ZSCALE_12 + 0x00000000, // PA_CL_VPORT_ZOFFSET_12 + 0x00000000, // PA_CL_VPORT_XSCALE_13 + 0x00000000, // PA_CL_VPORT_XOFFSET_13 + 0x00000000, // PA_CL_VPORT_YSCALE_13 + 0x00000000, // PA_CL_VPORT_YOFFSET_13 + 0x00000000, // PA_CL_VPORT_ZSCALE_13 + 0x00000000, // PA_CL_VPORT_ZOFFSET_13 + 0x00000000, // PA_CL_VPORT_XSCALE_14 + 0x00000000, // PA_CL_VPORT_XOFFSET_14 + 0x00000000, // PA_CL_VPORT_YSCALE_14 + 0x00000000, // PA_CL_VPORT_YOFFSET_14 + 0x00000000, // PA_CL_VPORT_ZSCALE_14 + 0x00000000, // PA_CL_VPORT_ZOFFSET_14 + 0x00000000, // PA_CL_VPORT_XSCALE_15 + 0x00000000, // PA_CL_VPORT_XOFFSET_15 + 0x00000000, // PA_CL_VPORT_YSCALE_15 + 0x00000000, // PA_CL_VPORT_YOFFSET_15 + 0x00000000, // PA_CL_VPORT_ZSCALE_15 + 0x00000000, // PA_CL_VPORT_ZOFFSET_15 + 0x00000000, // PA_CL_UCP_0_X + 0x00000000, // PA_CL_UCP_0_Y + 0x00000000, // PA_CL_UCP_0_Z + 0x00000000, // PA_CL_UCP_0_W + 0x00000000, // PA_CL_UCP_1_X + 0x00000000, // PA_CL_UCP_1_Y + 0x00000000, // PA_CL_UCP_1_Z + 0x00000000, // PA_CL_UCP_1_W + 0x00000000, // PA_CL_UCP_2_X + 0x00000000, // PA_CL_UCP_2_Y + 0x00000000, // PA_CL_UCP_2_Z + 0x00000000, // PA_CL_UCP_2_W + 0x00000000, // PA_CL_UCP_3_X + 0x00000000, // PA_CL_UCP_3_Y + 0x00000000, // PA_CL_UCP_3_Z + 0x00000000, // PA_CL_UCP_3_W + 0x00000000, // PA_CL_UCP_4_X + 0x00000000, // PA_CL_UCP_4_Y + 0x00000000, // PA_CL_UCP_4_Z + 0x00000000, // PA_CL_UCP_4_W + 0x00000000, // PA_CL_UCP_5_X + 0x00000000, // PA_CL_UCP_5_Y + 0x00000000, // PA_CL_UCP_5_Z + 0x00000000, // PA_CL_UCP_5_W + 0x00000000, // PA_CL_PROG_NEAR_CLIP_Z + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SPI_PS_INPUT_CNTL_0 + 0x00000000, // SPI_PS_INPUT_CNTL_1 + 0x00000000, // SPI_PS_INPUT_CNTL_2 + 0x00000000, // SPI_PS_INPUT_CNTL_3 + 0x00000000, // SPI_PS_INPUT_CNTL_4 + 0x00000000, // SPI_PS_INPUT_CNTL_5 + 0x00000000, // SPI_PS_INPUT_CNTL_6 + 0x00000000, // SPI_PS_INPUT_CNTL_7 + 0x00000000, // SPI_PS_INPUT_CNTL_8 + 0x00000000, // SPI_PS_INPUT_CNTL_9 + 0x00000000, // SPI_PS_INPUT_CNTL_10 + 0x00000000, // SPI_PS_INPUT_CNTL_11 + 0x00000000, // SPI_PS_INPUT_CNTL_12 + 0x00000000, // SPI_PS_INPUT_CNTL_13 + 0x00000000, // SPI_PS_INPUT_CNTL_14 + 0x00000000, // SPI_PS_INPUT_CNTL_15 + 0x00000000, // SPI_PS_INPUT_CNTL_16 + 0x00000000, // SPI_PS_INPUT_CNTL_17 + 0x00000000, // SPI_PS_INPUT_CNTL_18 + 0x00000000, // SPI_PS_INPUT_CNTL_19 + 0x00000000, // SPI_PS_INPUT_CNTL_20 + 0x00000000, // SPI_PS_INPUT_CNTL_21 + 0x00000000, // SPI_PS_INPUT_CNTL_22 + 0x00000000, // SPI_PS_INPUT_CNTL_23 + 0x00000000, // SPI_PS_INPUT_CNTL_24 + 0x00000000, // SPI_PS_INPUT_CNTL_25 + 0x00000000, // SPI_PS_INPUT_CNTL_26 + 0x00000000, // SPI_PS_INPUT_CNTL_27 + 0x00000000, // SPI_PS_INPUT_CNTL_28 + 0x00000000, // SPI_PS_INPUT_CNTL_29 + 0x00000000, // SPI_PS_INPUT_CNTL_30 + 0x00000000, // SPI_PS_INPUT_CNTL_31 + 0x00000000, // SPI_VS_OUT_CONFIG + 0, // HOLE + 0x00000000, // SPI_PS_INPUT_ENA + 0x00000000, // SPI_PS_INPUT_ADDR + 0x00000000, // SPI_INTERP_CONTROL_0 + 0x00000002, // SPI_PS_IN_CONTROL + 0, // HOLE + 0x00000000, // SPI_BARYC_CNTL + 0, // HOLE + 0x00000000, // SPI_TMPRING_SIZE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SPI_SHADER_IDX_FORMAT + 0x00000000, // SPI_SHADER_POS_FORMAT + 0x00000000, // SPI_SHADER_Z_FORMAT + 0x00000000, // SPI_SHADER_COL_FORMAT + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SX_PS_DOWNCONVERT + 0x00000000, // SX_BLEND_OPT_EPSILON + 0x00000000, // SX_BLEND_OPT_CONTROL + 0x00000000, // SX_MRT0_BLEND_OPT + 0x00000000, // SX_MRT1_BLEND_OPT + 0x00000000, // SX_MRT2_BLEND_OPT + 0x00000000, // SX_MRT3_BLEND_OPT + 0x00000000, // SX_MRT4_BLEND_OPT + 0x00000000, // SX_MRT5_BLEND_OPT + 0x00000000, // SX_MRT6_BLEND_OPT + 0x00000000, // SX_MRT7_BLEND_OPT + 0x00000000, // CB_BLEND0_CONTROL + 0x00000000, // CB_BLEND1_CONTROL + 0x00000000, // CB_BLEND2_CONTROL + 0x00000000, // CB_BLEND3_CONTROL + 0x00000000, // CB_BLEND4_CONTROL + 0x00000000, // CB_BLEND5_CONTROL + 0x00000000, // CB_BLEND6_CONTROL + 0x00000000, // CB_BLEND7_CONTROL +}; +static const unsigned int gfx10_SECT_CONTEXT_def_3[] = { + 0x00000000, // PA_CL_POINT_X_RAD + 0x00000000, // PA_CL_POINT_Y_RAD + 0x00000000, // PA_CL_POINT_SIZE + 0x00000000, // PA_CL_POINT_CULL_RAD +}; +static const unsigned int gfx10_SECT_CONTEXT_def_4[] = { + 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP + 0x00000000, // DB_DEPTH_CONTROL + 0x00000000, // DB_EQAA + 0x00000000, // CB_COLOR_CONTROL + 0x00000000, // DB_SHADER_CONTROL + 0x00090000, // PA_CL_CLIP_CNTL + 0x00000004, // PA_SU_SC_MODE_CNTL + 0x00000000, // PA_CL_VTE_CNTL + 0x00000000, // PA_CL_VS_OUT_CNTL + 0x00000000, // PA_CL_NANINF_CNTL + 0x00000000, // PA_SU_LINE_STIPPLE_CNTL + 0x00000000, // PA_SU_LINE_STIPPLE_SCALE + 0x00000000, // PA_SU_PRIM_FILTER_CNTL + 0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL + 0x00000000, // PA_CL_OBJPRIM_ID_CNTL + 0x00000000, // PA_CL_NGG_CNTL + 0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // PA_SU_POINT_SIZE + 0x00000000, // PA_SU_POINT_MINMAX + 0x00000000, // PA_SU_LINE_CNTL + 0x00000000, // PA_SC_LINE_STIPPLE + 0x00000000, // VGT_OUTPUT_PATH_CNTL + 0x00000000, // VGT_HOS_CNTL + 0x00000000, // VGT_HOS_MAX_TESS_LEVEL + 0x00000000, // VGT_HOS_MIN_TESS_LEVEL + 0x00000000, // VGT_HOS_REUSE_DEPTH + 0x00000000, // VGT_GROUP_PRIM_TYPE + 0x00000000, // VGT_GROUP_FIRST_DECR + 0x00000000, // VGT_GROUP_DECR + 0x00000000, // VGT_GROUP_VECT_0_CNTL + 0x00000000, // VGT_GROUP_VECT_1_CNTL + 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL + 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL + 0x00000000, // VGT_GS_MODE + 0x00000000, // VGT_GS_ONCHIP_CNTL + 0x00000000, // PA_SC_MODE_CNTL_0 + 0x00000000, // PA_SC_MODE_CNTL_1 + 0x00000000, // VGT_ENHANCE + 0x00000100, // VGT_GS_PER_ES + 0x00000080, // VGT_ES_PER_GS + 0x00000002, // VGT_GS_PER_VS + 0x00000000, // VGT_GSVS_RING_OFFSET_1 + 0x00000000, // VGT_GSVS_RING_OFFSET_2 + 0x00000000, // VGT_GSVS_RING_OFFSET_3 + 0x00000000, // VGT_GS_OUT_PRIM_TYPE + 0x00000000, // IA_ENHANCE +}; +static const unsigned int gfx10_SECT_CONTEXT_def_5[] = { + 0x00000000, // WD_ENHANCE + 0x00000000, // VGT_PRIMITIVEID_EN +}; +static const unsigned int gfx10_SECT_CONTEXT_def_6[] = { + 0x00000000, // VGT_PRIMITIVEID_RESET +}; +static const unsigned int gfx10_SECT_CONTEXT_def_7[] = { + 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN + 0x00000000, // VGT_DRAW_PAYLOAD_CNTL + 0x00000000, // HOLE + 0x00000000, // VGT_INSTANCE_STEP_RATE_0 + 0x00000000, // VGT_INSTANCE_STEP_RATE_1 + 0x000000ff, // IA_MULTI_VGT_PARAM + 0x00000000, // VGT_ESGS_RING_ITEMSIZE + 0x00000000, // VGT_GSVS_RING_ITEMSIZE + 0x00000000, // VGT_REUSE_OFF + 0x00000000, // VGT_VTX_CNT_EN + 0x00000000, // DB_HTILE_SURFACE + 0x00000000, // DB_SRESULTS_COMPARE_STATE0 + 0x00000000, // DB_SRESULTS_COMPARE_STATE1 + 0x00000000, // DB_PRELOAD_CONTROL + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE + 0, // HOLE + 0x00000000, // VGT_GS_MAX_VERT_OUT + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // VGT_TESS_DISTRIBUTION + 0x00000000, // VGT_SHADER_STAGES_EN + 0x00000000, // VGT_LS_HS_CONFIG + 0x00000000, // VGT_GS_VERT_ITEMSIZE + 0x00000000, // VGT_GS_VERT_ITEMSIZE_1 + 0x00000000, // VGT_GS_VERT_ITEMSIZE_2 + 0x00000000, // VGT_GS_VERT_ITEMSIZE_3 + 0x00000000, // VGT_TF_PARAM + 0x00000000, // DB_ALPHA_TO_MASK + 0x00000000, // VGT_DISPATCH_DRAW_INDEX + 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL + 0x00000000, // PA_SU_POLY_OFFSET_CLAMP + 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE + 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET + 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE + 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET + 0x00000000, // VGT_GS_INSTANCE_CNT + 0x00000000, // VGT_STRMOUT_CONFIG + 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG +}; +static const unsigned int gfx10_SECT_CONTEXT_def_8[] = { + 0x00000000, // PA_SC_CENTROID_PRIORITY_0 + 0x00000000, // PA_SC_CENTROID_PRIORITY_1 + 0x00001000, // PA_SC_LINE_CNTL + 0x00000000, // PA_SC_AA_CONFIG + 0x00000005, // PA_SU_VTX_CNTL + 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ + 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ + 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ + 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 + 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0 + 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1 + 0x00000000, // PA_SC_SHADER_CONTROL + 0x00000003, // PA_SC_BINNER_CNTL_0 + 0x00000000, // PA_SC_BINNER_CNTL_1 + 0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL + 0x00000000, // PA_SC_NGG_MODE_CNTL + 0, // HOLE + 0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL + 0x00000020, // VGT_OUT_DEALLOC_CNTL + 0x00000000, // CB_COLOR0_BASE + 0x00000000, // CB_COLOR0_PITCH + 0x00000000, // CB_COLOR0_SLICE + 0x00000000, // CB_COLOR0_VIEW + 0x00000000, // CB_COLOR0_INFO + 0x00000000, // CB_COLOR0_ATTRIB + 0x00000000, // CB_COLOR0_DCC_CONTROL + 0x00000000, // CB_COLOR0_CMASK + 0x00000000, // CB_COLOR0_CMASK_SLICE + 0x00000000, // CB_COLOR0_FMASK + 0x00000000, // CB_COLOR0_FMASK_SLICE + 0x00000000, // CB_COLOR0_CLEAR_WORD0 + 0x00000000, // CB_COLOR0_CLEAR_WORD1 + 0x00000000, // CB_COLOR0_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR1_BASE + 0x00000000, // CB_COLOR1_PITCH + 0x00000000, // CB_COLOR1_SLICE + 0x00000000, // CB_COLOR1_VIEW + 0x00000000, // CB_COLOR1_INFO + 0x00000000, // CB_COLOR1_ATTRIB + 0x00000000, // CB_COLOR1_DCC_CONTROL + 0x00000000, // CB_COLOR1_CMASK + 0x00000000, // CB_COLOR1_CMASK_SLICE + 0x00000000, // CB_COLOR1_FMASK + 0x00000000, // CB_COLOR1_FMASK_SLICE + 0x00000000, // CB_COLOR1_CLEAR_WORD0 + 0x00000000, // CB_COLOR1_CLEAR_WORD1 + 0x00000000, // CB_COLOR1_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR2_BASE + 0x00000000, // CB_COLOR2_PITCH + 0x00000000, // CB_COLOR2_SLICE + 0x00000000, // CB_COLOR2_VIEW + 0x00000000, // CB_COLOR2_INFO + 0x00000000, // CB_COLOR2_ATTRIB + 0x00000000, // CB_COLOR2_DCC_CONTROL + 0x00000000, // CB_COLOR2_CMASK + 0x00000000, // CB_COLOR2_CMASK_SLICE + 0x00000000, // CB_COLOR2_FMASK + 0x00000000, // CB_COLOR2_FMASK_SLICE + 0x00000000, // CB_COLOR2_CLEAR_WORD0 + 0x00000000, // CB_COLOR2_CLEAR_WORD1 + 0x00000000, // CB_COLOR2_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR3_BASE + 0x00000000, // CB_COLOR3_PITCH + 0x00000000, // CB_COLOR3_SLICE + 0x00000000, // CB_COLOR3_VIEW + 0x00000000, // CB_COLOR3_INFO + 0x00000000, // CB_COLOR3_ATTRIB + 0x00000000, // CB_COLOR3_DCC_CONTROL + 0x00000000, // CB_COLOR3_CMASK + 0x00000000, // CB_COLOR3_CMASK_SLICE + 0x00000000, // CB_COLOR3_FMASK + 0x00000000, // CB_COLOR3_FMASK_SLICE + 0x00000000, // CB_COLOR3_CLEAR_WORD0 + 0x00000000, // CB_COLOR3_CLEAR_WORD1 + 0x00000000, // CB_COLOR3_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR4_BASE + 0x00000000, // CB_COLOR4_PITCH + 0x00000000, // CB_COLOR4_SLICE + 0x00000000, // CB_COLOR4_VIEW + 0x00000000, // CB_COLOR4_INFO + 0x00000000, // CB_COLOR4_ATTRIB + 0x00000000, // CB_COLOR4_DCC_CONTROL + 0x00000000, // CB_COLOR4_CMASK + 0x00000000, // CB_COLOR4_CMASK_SLICE + 0x00000000, // CB_COLOR4_FMASK + 0x00000000, // CB_COLOR4_FMASK_SLICE + 0x00000000, // CB_COLOR4_CLEAR_WORD0 + 0x00000000, // CB_COLOR4_CLEAR_WORD1 + 0x00000000, // CB_COLOR4_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR5_BASE + 0x00000000, // CB_COLOR5_PITCH + 0x00000000, // CB_COLOR5_SLICE + 0x00000000, // CB_COLOR5_VIEW + 0x00000000, // CB_COLOR5_INFO + 0x00000000, // CB_COLOR5_ATTRIB + 0x00000000, // CB_COLOR5_DCC_CONTROL + 0x00000000, // CB_COLOR5_CMASK + 0x00000000, // CB_COLOR5_CMASK_SLICE + 0x00000000, // CB_COLOR5_FMASK + 0x00000000, // CB_COLOR5_FMASK_SLICE + 0x00000000, // CB_COLOR5_CLEAR_WORD0 + 0x00000000, // CB_COLOR5_CLEAR_WORD1 + 0x00000000, // CB_COLOR5_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR6_BASE + 0x00000000, // CB_COLOR6_PITCH + 0x00000000, // CB_COLOR6_SLICE + 0x00000000, // CB_COLOR6_VIEW + 0x00000000, // CB_COLOR6_INFO + 0x00000000, // CB_COLOR6_ATTRIB + 0x00000000, // CB_COLOR6_DCC_CONTROL + 0x00000000, // CB_COLOR6_CMASK + 0x00000000, // CB_COLOR6_CMASK_SLICE + 0x00000000, // CB_COLOR6_FMASK + 0x00000000, // CB_COLOR6_FMASK_SLICE + 0x00000000, // CB_COLOR6_CLEAR_WORD0 + 0x00000000, // CB_COLOR6_CLEAR_WORD1 + 0x00000000, // CB_COLOR6_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR7_BASE + 0x00000000, // CB_COLOR7_PITCH + 0x00000000, // CB_COLOR7_SLICE + 0x00000000, // CB_COLOR7_VIEW + 0x00000000, // CB_COLOR7_INFO + 0x00000000, // CB_COLOR7_ATTRIB + 0x00000000, // CB_COLOR7_DCC_CONTROL + 0x00000000, // CB_COLOR7_CMASK + 0x00000000, // CB_COLOR7_CMASK_SLICE + 0x00000000, // CB_COLOR7_FMASK + 0x00000000, // CB_COLOR7_FMASK_SLICE + 0x00000000, // CB_COLOR7_CLEAR_WORD0 + 0x00000000, // CB_COLOR7_CLEAR_WORD1 + 0x00000000, // CB_COLOR7_DCC_BASE + 0, // HOLE + 0x00000000, // CB_COLOR0_BASE_EXT + 0x00000000, // CB_COLOR1_BASE_EXT + 0x00000000, // CB_COLOR2_BASE_EXT + 0x00000000, // CB_COLOR3_BASE_EXT + 0x00000000, // CB_COLOR4_BASE_EXT + 0x00000000, // CB_COLOR5_BASE_EXT + 0x00000000, // CB_COLOR6_BASE_EXT + 0x00000000, // CB_COLOR7_BASE_EXT + 0x00000000, // CB_COLOR0_CMASK_BASE_EXT + 0x00000000, // CB_COLOR1_CMASK_BASE_EXT + 0x00000000, // CB_COLOR2_CMASK_BASE_EXT + 0x00000000, // CB_COLOR3_CMASK_BASE_EXT + 0x00000000, // CB_COLOR4_CMASK_BASE_EXT + 0x00000000, // CB_COLOR5_CMASK_BASE_EXT + 0x00000000, // CB_COLOR6_CMASK_BASE_EXT + 0x00000000, // CB_COLOR7_CMASK_BASE_EXT + 0x00000000, // CB_COLOR0_FMASK_BASE_EXT + 0x00000000, // CB_COLOR1_FMASK_BASE_EXT + 0x00000000, // CB_COLOR2_FMASK_BASE_EXT + 0x00000000, // CB_COLOR3_FMASK_BASE_EXT + 0x00000000, // CB_COLOR4_FMASK_BASE_EXT + 0x00000000, // CB_COLOR5_FMASK_BASE_EXT + 0x00000000, // CB_COLOR6_FMASK_BASE_EXT + 0x00000000, // CB_COLOR7_FMASK_BASE_EXT + 0x00000000, // CB_COLOR0_DCC_BASE_EXT + 0x00000000, // CB_COLOR1_DCC_BASE_EXT + 0x00000000, // CB_COLOR2_DCC_BASE_EXT + 0x00000000, // CB_COLOR3_DCC_BASE_EXT + 0x00000000, // CB_COLOR4_DCC_BASE_EXT + 0x00000000, // CB_COLOR5_DCC_BASE_EXT + 0x00000000, // CB_COLOR6_DCC_BASE_EXT + 0x00000000, // CB_COLOR7_DCC_BASE_EXT + 0x00000000, // CB_COLOR0_ATTRIB2 + 0x00000000, // CB_COLOR1_ATTRIB2 + 0x00000000, // CB_COLOR2_ATTRIB2 + 0x00000000, // CB_COLOR3_ATTRIB2 + 0x00000000, // CB_COLOR4_ATTRIB2 + 0x00000000, // CB_COLOR5_ATTRIB2 + 0x00000000, // CB_COLOR6_ATTRIB2 + 0x00000000, // CB_COLOR7_ATTRIB2 + 0x00000000, // CB_COLOR0_ATTRIB3 + 0x00000000, // CB_COLOR1_ATTRIB3 + 0x00000000, // CB_COLOR2_ATTRIB3 + 0x00000000, // CB_COLOR3_ATTRIB3 + 0x00000000, // CB_COLOR4_ATTRIB3 + 0x00000000, // CB_COLOR5_ATTRIB3 + 0x00000000, // CB_COLOR6_ATTRIB3 + 0x00000000, // CB_COLOR7_ATTRIB3 +}; +static const struct cs_extent_def gfx10_SECT_CONTEXT_defs[] = { + {gfx10_SECT_CONTEXT_def_1, 0x0000a000, 215 }, + {gfx10_SECT_CONTEXT_def_2, 0x0000a0d8, 272 }, + {gfx10_SECT_CONTEXT_def_3, 0x0000a1f5, 4 }, + {gfx10_SECT_CONTEXT_def_4, 0x0000a1ff, 158 }, + {gfx10_SECT_CONTEXT_def_5, 0x0000a2a0, 2 }, + {gfx10_SECT_CONTEXT_def_6, 0x0000a2a3, 1 }, + {gfx10_SECT_CONTEXT_def_7, 0x0000a2a5, 66 }, + {gfx10_SECT_CONTEXT_def_8, 0x0000a2f5, 203 }, + { 0, 0, 0 } +}; +static const struct cs_section_def gfx10_cs_data[] = { + { gfx10_SECT_CONTEXT_defs, SECT_CONTEXT }, + { 0, SECT_NONE } +}; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 3026298da7eb..3cc0a16649f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -457,6 +457,7 @@ static int dce_virtual_hw_init(void *handle) case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_NAVI10: break; default: DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index 9935371db7ce..844c03868248 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -29,7 +29,7 @@ static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; -static void df_v1_7_init (struct amdgpu_device *adev) +static void df_v1_7_sw_init(struct amdgpu_device *adev) { } @@ -110,7 +110,7 @@ static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev, } const struct amdgpu_df_funcs df_v1_7_funcs = { - .init = df_v1_7_init, + .sw_init = df_v1_7_sw_init, .enable_broadcast_mode = df_v1_7_enable_broadcast_mode, .get_fb_channel_number = df_v1_7_get_fb_channel_number, .get_hbm_channel_number = df_v1_7_get_hbm_channel_number, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 8c09bf994acd..ef6e91f9f51c 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -30,8 +30,104 @@ static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 32, 0, 0, 0, 2, 4, 8}; -static void df_v3_6_init(struct amdgpu_device *adev) +/* init df format attrs */ +AMDGPU_PMU_ATTR(event, "config:0-7"); +AMDGPU_PMU_ATTR(instance, "config:8-15"); +AMDGPU_PMU_ATTR(umask, "config:16-23"); + +/* df format attributes */ +static struct attribute *df_v3_6_format_attrs[] = { + &pmu_attr_event.attr, + &pmu_attr_instance.attr, + &pmu_attr_umask.attr, + NULL +}; + +/* df format attribute group */ +static struct attribute_group df_v3_6_format_attr_group = { + .name = "format", + .attrs = df_v3_6_format_attrs, +}; + +/* df event attrs */ +AMDGPU_PMU_ATTR(cake0_pcsout_txdata, + "event=0x7,instance=0x46,umask=0x2"); +AMDGPU_PMU_ATTR(cake1_pcsout_txdata, + "event=0x7,instance=0x47,umask=0x2"); +AMDGPU_PMU_ATTR(cake0_pcsout_txmeta, + "event=0x7,instance=0x46,umask=0x4"); +AMDGPU_PMU_ATTR(cake1_pcsout_txmeta, + "event=0x7,instance=0x47,umask=0x4"); +AMDGPU_PMU_ATTR(cake0_ftiinstat_reqalloc, + "event=0xb,instance=0x46,umask=0x4"); +AMDGPU_PMU_ATTR(cake1_ftiinstat_reqalloc, + "event=0xb,instance=0x47,umask=0x4"); +AMDGPU_PMU_ATTR(cake0_ftiinstat_rspalloc, + "event=0xb,instance=0x46,umask=0x8"); +AMDGPU_PMU_ATTR(cake1_ftiinstat_rspalloc, + "event=0xb,instance=0x47,umask=0x8"); + +/* df event attributes */ +static struct attribute *df_v3_6_event_attrs[] = { + &pmu_attr_cake0_pcsout_txdata.attr, + &pmu_attr_cake1_pcsout_txdata.attr, + &pmu_attr_cake0_pcsout_txmeta.attr, + &pmu_attr_cake1_pcsout_txmeta.attr, + &pmu_attr_cake0_ftiinstat_reqalloc.attr, + &pmu_attr_cake1_ftiinstat_reqalloc.attr, + &pmu_attr_cake0_ftiinstat_rspalloc.attr, + &pmu_attr_cake1_ftiinstat_rspalloc.attr, + NULL +}; + +/* df event attribute group */ +static struct attribute_group df_v3_6_event_attr_group = { + .name = "events", + .attrs = df_v3_6_event_attrs +}; + +/* df event attr groups */ +const struct attribute_group *df_v3_6_attr_groups[] = { + &df_v3_6_format_attr_group, + &df_v3_6_event_attr_group, + NULL +}; + +/* get the number of df counters available */ +static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, + struct device_attribute *attr, + char *buf) { + struct amdgpu_device *adev; + struct drm_device *ddev; + int i, count; + + ddev = dev_get_drvdata(dev); + adev = ddev->dev_private; + count = 0; + + for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { + if (adev->df_perfmon_config_assign_mask[i] == 0) + count++; + } + + return snprintf(buf, PAGE_SIZE, "%i\n", count); +} + +/* device attr for available perfmon counters */ +static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL); + +/* init perfmons */ +static void df_v3_6_sw_init(struct amdgpu_device *adev) +{ + int i, ret; + + ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail); + if (ret) + DRM_ERROR("failed to create file for available df counters\n"); + + for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++) + adev->df_perfmon_config_assign_mask[i] = 0; } static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev, @@ -105,28 +201,19 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, *flags |= AMD_CG_SUPPORT_DF_MGCG; } -/* hold counter assignment per gpu struct */ -struct df_v3_6_event_mask { - struct amdgpu_device gpu; - uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS]; -}; - /* get assigned df perfmon ctr as int */ -static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev, - uint64_t config, - int *counter) +static int df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev, + uint64_t config) { - struct df_v3_6_event_mask *mask; int i; - mask = container_of(adev, struct df_v3_6_event_mask, gpu); - - for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { - if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) { - *counter = i; - return; - } + for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { + if ((config & 0x0FFFFFFUL) == + adev->df_perfmon_config_assign_mask[i]) + return i; } + + return -EINVAL; } /* get address based on counter assignment */ @@ -136,10 +223,7 @@ static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev, uint32_t *lo_base_addr, uint32_t *hi_base_addr) { - - int target_cntr = -1; - - df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); if (target_cntr < 0) return; @@ -177,40 +261,38 @@ static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev, } /* get control counter settings i.e. address and values to set */ -static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, +static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, uint64_t config, uint32_t *lo_base_addr, uint32_t *hi_base_addr, uint32_t *lo_val, uint32_t *hi_val) { - - uint32_t eventsel, instance, unitmask; - uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0; - df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); - if (lo_val == NULL || hi_val == NULL) - return; - if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { - DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x", + DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x", *lo_base_addr, *hi_base_addr); - return; + return -ENXIO; + } + + if (lo_val && hi_val) { + uint32_t eventsel, instance, unitmask; + uint32_t instance_10, instance_5432, instance_76; + + eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; + unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; + instance = DF_V3_6_GET_INSTANCE(config); + + instance_10 = instance & 0x3; + instance_5432 = (instance >> 2) & 0xf; + instance_76 = (instance >> 6) & 0x3; + + *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel; + *hi_val = (instance_76 << 29) | instance_5432; } - eventsel = GET_EVENT(config); - instance = GET_INSTANCE(config); - unitmask = GET_UNITMASK(config); - - es_5_0 = eventsel & 0x3FUL; - es_13_6 = instance; - es_13_0 = (es_13_6 << 6) + es_5_0; - es_13_12 = (es_13_0 & 0x03000UL) >> 12; - es_11_8 = (es_13_0 & 0x0F00UL) >> 8; - es_7_0 = es_13_0 & 0x0FFUL; - *lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8); - *hi_val = (es_11_8 | ((es_13_12)<<(29))); + return 0; } /* assign df performance counters for read */ @@ -218,26 +300,21 @@ static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, uint64_t config, int *is_assigned) { - - struct df_v3_6_event_mask *mask; int i, target_cntr; - target_cntr = -1; - *is_assigned = 0; - df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); + target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); if (target_cntr >= 0) { *is_assigned = 1; return 0; } - mask = container_of(adev, struct df_v3_6_event_mask, gpu); - - for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) { - if (mask->config_assign_mask[i] == 0ULL) { - mask->config_assign_mask[i] = config & 0x0FFFFFFUL; + for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { + if (adev->df_perfmon_config_assign_mask[i] == 0U) { + adev->df_perfmon_config_assign_mask[i] = + config & 0x0FFFFFFUL; return 0; } } @@ -249,66 +326,17 @@ static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, uint64_t config) { - - struct df_v3_6_event_mask *mask; - int target_cntr; - - target_cntr = -1; - - df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr); - - mask = container_of(adev, struct df_v3_6_event_mask, gpu); + int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); if (target_cntr >= 0) - mask->config_assign_mask[target_cntr] = 0ULL; - + adev->df_perfmon_config_assign_mask[target_cntr] = 0ULL; } -/* - * get xgmi link counters via programmable data fabric (df) counters (max 4) - * using cake tx event. - * - * @adev -> amdgpu device - * @instance-> currently cake has 2 links to poll on vega20 - * @count -> counters to pass - * - */ - -static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev, - int instance, - uint64_t *count) -{ - uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; - uint64_t config; - - config = GET_INSTANCE_CONFIG(instance); - - df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, - &hi_base_addr); - - if ((lo_base_addr == 0) || (hi_base_addr == 0)) - return; - - lo_val = RREG32_PCIE(lo_base_addr); - hi_val = RREG32_PCIE(hi_base_addr); - *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); -} - -/* - * reset xgmi link counters - * - * @adev -> amdgpu device - * @instance-> currently cake has 2 links to poll on vega20 - * - */ -static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev, - int instance) +static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev, + uint64_t config) { uint32_t lo_base_addr, hi_base_addr; - uint64_t config; - - config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16); df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, &hi_base_addr); @@ -320,185 +348,106 @@ static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev, WREG32_PCIE(hi_base_addr, 0UL); } -/* - * add xgmi link counters - * - * @adev -> amdgpu device - * @instance-> currently cake has 2 links to poll on vega20 - * - */ -static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev, - int instance) +static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev, + uint64_t config) { uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; - uint64_t config; int ret, is_assigned; - if (instance < 0 || instance > 1) - return -EINVAL; - - config = GET_INSTANCE_CONFIG(instance); - ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned); if (ret || is_assigned) return ret; - df_v3_6_pmc_get_ctrl_settings(adev, + ret = df_v3_6_pmc_get_ctrl_settings(adev, config, &lo_base_addr, &hi_base_addr, &lo_val, &hi_val); + if (ret) + return ret; + + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", + config, lo_base_addr, hi_base_addr, lo_val, hi_val); + WREG32_PCIE(lo_base_addr, lo_val); WREG32_PCIE(hi_base_addr, hi_val); return ret; } - -/* - * start xgmi link counters - * - * @adev -> amdgpu device - * @instance-> currently cake has 2 links to poll on vega20 - * @is_enable -> either resume or assign event via df perfmon - * - */ - -static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev, - int instance, - int is_enable) +static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, + int is_enable) { uint32_t lo_base_addr, hi_base_addr, lo_val; - uint64_t config; - int ret; - - if (instance < 0 || instance > 1) - return -EINVAL; - - if (is_enable) { + int ret = 0; - ret = df_v3_6_add_xgmi_link_cntr(adev, instance); - - if (ret) - return ret; + switch (adev->asic_type) { + case CHIP_VEGA20: - } else { + df_v3_6_reset_perfmon_cntr(adev, config); - config = GET_INSTANCE_CONFIG(instance); + if (is_enable) { + ret = df_v3_6_add_perfmon_cntr(adev, config); + } else { + ret = df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + NULL, + NULL); - df_v3_6_pmc_get_ctrl_settings(adev, - config, - &lo_base_addr, - &hi_base_addr, - NULL, - NULL); + if (ret) + return ret; - if (lo_base_addr == 0) - return -EINVAL; + lo_val = RREG32_PCIE(lo_base_addr); - lo_val = RREG32_PCIE(lo_base_addr); + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", + config, lo_base_addr, hi_base_addr, lo_val); - WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); + WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); + } - ret = 0; + break; + default: + break; } return ret; - } -/* - * start xgmi link counters - * - * @adev -> amdgpu device - * @instance-> currently cake has 2 links to poll on vega20 - * @is_enable -> either pause or unassign event via df perfmon - * - */ - -static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev, - int instance, - int is_disable) +static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, + int is_disable) { - uint32_t lo_base_addr, hi_base_addr, lo_val; - uint64_t config; - - config = GET_INSTANCE_CONFIG(instance); - - if (is_disable) { - df_v3_6_reset_xgmi_link_cntr(adev, instance); - df_v3_6_pmc_release_cntr(adev, config); - } else { - - df_v3_6_pmc_get_ctrl_settings(adev, - config, - &lo_base_addr, - &hi_base_addr, - NULL, - NULL); - - if ((lo_base_addr == 0) || (hi_base_addr == 0)) - return -EINVAL; - - lo_val = RREG32_PCIE(lo_base_addr); - - WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); - } - - return 0; -} - -static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, - int is_enable) -{ - int xgmi_tx_link, ret = 0; + int ret = 0; switch (adev->asic_type) { case CHIP_VEGA20: - xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 - : (IS_DF_XGMI_1_TX(config) ? 1 : -1); - - if (xgmi_tx_link >= 0) - ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link, - is_enable); + ret = df_v3_6_pmc_get_ctrl_settings(adev, + config, + &lo_base_addr, + &hi_base_addr, + NULL, + NULL); if (ret) return ret; - ret = 0; - break; - default: - break; - } + lo_val = RREG32_PCIE(lo_base_addr); - return ret; -} + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", + config, lo_base_addr, hi_base_addr, lo_val); -static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, - int is_disable) -{ - int xgmi_tx_link, ret = 0; + WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); - switch (adev->asic_type) { - case CHIP_VEGA20: - xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 - : (IS_DF_XGMI_1_TX(config) ? 1 : -1); - - if (xgmi_tx_link >= 0) { - ret = df_v3_6_stop_xgmi_link_cntr(adev, - xgmi_tx_link, - is_disable); - if (ret) - return ret; - } - - ret = 0; - break; + if (is_disable) + df_v3_6_pmc_release_cntr(adev, config); + + break; default: break; } @@ -510,28 +459,38 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, uint64_t config, uint64_t *count) { - - int xgmi_tx_link; + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; + *count = 0; switch (adev->asic_type) { case CHIP_VEGA20: - xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0 - : (IS_DF_XGMI_1_TX(config) ? 1 : -1); - if (xgmi_tx_link >= 0) { - df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link); - df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count); - } + df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, + &hi_base_addr); + + if ((lo_base_addr == 0) || (hi_base_addr == 0)) + return; + + lo_val = RREG32_PCIE(lo_base_addr); + hi_val = RREG32_PCIE(hi_base_addr); + + *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); + + if (*count >= DF_V3_6_PERFMON_OVERFLOW) + *count = 0; + + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", + config, lo_base_addr, hi_base_addr, lo_val, hi_val); break; + default: break; } - } const struct amdgpu_df_funcs df_v3_6_funcs = { - .init = df_v3_6_init, + .sw_init = df_v3_6_sw_init, .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, .get_fb_channel_number = df_v3_6_get_fb_channel_number, .get_hbm_channel_number = df_v3_6_get_hbm_channel_number, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h index fcffd807764d..76998541bc30 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h @@ -36,22 +36,15 @@ enum DF_V3_6_MGCG { }; /* Defined in global_features.h as FTI_PERFMON_VISIBLE */ -#define AMDGPU_DF_MAX_COUNTERS 4 +#define DF_V3_6_MAX_COUNTERS 4 /* get flags from df perfmon config */ -#define GET_EVENT(x) (x & 0xFFUL) -#define GET_INSTANCE(x) ((x >> 8) & 0xFFUL) -#define GET_UNITMASK(x) ((x >> 16) & 0xFFUL) -#define GET_INSTANCE_CONFIG(x) (0ULL | (0x07ULL) \ - | ((0x046ULL + x) << 8) \ - | (0x02 << 16)) - -/* df event conf macros */ -#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \ - && GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2) -#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \ - && GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2) +#define DF_V3_6_GET_EVENT(x) (x & 0xFFUL) +#define DF_V3_6_GET_INSTANCE(x) ((x >> 8) & 0xFFUL) +#define DF_V3_6_GET_UNITMASK(x) ((x >> 16) & 0xFFUL) +#define DF_V3_6_PERFMON_OVERFLOW 0xFFFFFFFFFFFFULL +extern const struct attribute_group *df_v3_6_attr_groups[]; extern const struct amdgpu_df_funcs df_v3_6_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c new file mode 100644 index 000000000000..1675d5837c3c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -0,0 +1,5216 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "amdgpu_psp.h" +#include "amdgpu_smu.h" +#include "nv.h" +#include "nvd.h" + +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "navi10_enum.h" +#include "hdp/hdp_5_0_0_offset.h" +#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" + +#include "soc15.h" +#include "soc15_common.h" +#include "clearstate_gfx10.h" +#include "v10_structs.h" +#include "gfx_v10_0.h" +#include "nbio_v2_3.h" + +/** + * Navi10 has two graphic rings to share each graphic pipe. + * 1. Primary ring + * 2. Async ring + * + * In bring-up phase, it just used primary ring so set gfx ring number as 1 at + * first. + */ +#define GFX10_NUM_GFX_RINGS 2 +#define GFX10_MEC_HPD_SIZE 2048 + +#define F32_CE_PROGRAM_RAM_SIZE 65536 +#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L + +MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi10_me.bin"); +MODULE_FIRMWARE("amdgpu/navi10_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi10_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi10_rlc.bin"); + +static const struct soc15_reg_golden golden_settings_gc_10_1[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xfeff8fff, 0xfeff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x000007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0x20000000, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07900000, 0x04900000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CGTT_CLK_CTRL, 0xfeff0fff, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) +}; + +static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = +{ + /* Pending on emulation bring up */ +}; + +#define DEFAULT_SH_MEM_CONFIG \ + ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ + (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ + (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \ + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) + + +static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev); +static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev); +static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev); +static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev); +static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info); +static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev); +static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance); +static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); + +static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev); +static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev); +static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); +static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); +static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); +static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); +static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); + +static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { + .kiq_set_resources = gfx10_kiq_set_resources, + .kiq_map_queues = gfx10_kiq_map_queues, + .kiq_unmap_queues = gfx10_kiq_unmap_queues, + .kiq_query_status = gfx10_kiq_query_status, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, +}; + +static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq.pmf = &gfx_v10_0_kiq_pm4_funcs; +} + +static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_NAVI10: + soc15_program_register_sequence(adev, + golden_settings_gc_10_1, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_0_nv10, + (const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10)); + break; + default: + break; + } +} + +static void gfx_v10_0_scratch_init(struct amdgpu_device *adev) +{ + adev->gfx.scratch.num_reg = 8; + adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); + adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; +} + +static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, + bool wc, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | + WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + return r; + } + + WREG32(scratch, 0xCAFEDEAD); + + r = amdgpu_ring_alloc(ring, 3); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + if (amdgpu_emu_mode == 1) + msleep(1); + else + DRM_UDELAY(1); + } + if (i < adev->usec_timeout) { + if (amdgpu_emu_mode == 1) + DRM_INFO("ring test on %d succeeded in %d msecs\n", + ring->idx, i); + else + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", + ring->idx, scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + + return r; +} + +static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + uint32_t scratch; + uint32_t tmp = 0; + long r; + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + return r; + } + + WREG32(scratch, 0xCAFEDEAD); + + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 256, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; + } + + ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); + ib.ptr[2] = 0xDEADBEEF; + ib.length_dw = 3; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err2; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + goto err2; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err2; + } + + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } else { + DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } +err2: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err1: + amdgpu_gfx_scratch_free(adev, scratch); + + return r; +} + +static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) +{ + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + + kfree(adev->gfx.rlc.register_list_format); +} + +static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_1 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); + adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); + adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); + adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); + adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); + adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); + adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); + adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); + adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); + adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); + adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); + adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); + adev->gfx.rlc.reg_list_format_direct_reg_list_length = + le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); +} + +static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_NAVI10: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + break; + default: + break; + } +} + +static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + unsigned int *tmp = NULL; + unsigned int i = 0; + uint16_t version_major; + uint16_t version_minor; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_NAVI10: + chip_name = "navi10"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); + err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.pfp_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); + err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.me_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; + adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); + err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.ce_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; + adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + adev->gfx.rlc.save_and_restore_offset = + le32_to_cpu(rlc_hdr->save_and_restore_offset); + adev->gfx.rlc.clear_state_descriptor_offset = + le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); + adev->gfx.rlc.avail_scratch_ram_locations = + le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); + adev->gfx.rlc.reg_restore_list_size = + le32_to_cpu(rlc_hdr->reg_restore_list_size); + adev->gfx.rlc.reg_list_format_start = + le32_to_cpu(rlc_hdr->reg_list_format_start); + adev->gfx.rlc.reg_list_format_separate_start = + le32_to_cpu(rlc_hdr->reg_list_format_separate_start); + adev->gfx.rlc.starting_offsets_start = + le32_to_cpu(rlc_hdr->starting_offsets_start); + adev->gfx.rlc.reg_list_format_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); + adev->gfx.rlc.reg_list_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_size_bytes); + adev->gfx.rlc.register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + if (!adev->gfx.rlc.register_list_format) { + err = -ENOMEM; + goto out; + } + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v10_0_init_rlc_ext_microcode(adev); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.mec_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); + if (!err) { + err = amdgpu_ucode_validate(adev->gfx.mec2_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec2_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + } else { + err = 0; + adev->gfx.mec2_fw = NULL; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; + info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; + info->fw = adev->gfx.pfp_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; + info->ucode_id = AMDGPU_UCODE_ID_CP_ME; + info->fw = adev->gfx.me_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; + info->ucode_id = AMDGPU_UCODE_ID_CP_CE; + info->fw = adev->gfx.ce_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_G; + info->fw = adev->gfx.rlc_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + if (adev->gfx.rlc.is_rlc_v2_1 && + adev->gfx.rlc.save_restore_list_cntl_size_bytes && + adev->gfx.rlc.save_restore_list_gpm_size_bytes && + adev->gfx.rlc.save_restore_list_srm_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); + } + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; + info->fw = adev->gfx.mec_fw; + header = (const struct common_firmware_header *)info->fw->data; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes) - + le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; + info->fw = adev->gfx.mec_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + + if (adev->gfx.mec2_fw) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; + info->fw = adev->gfx.mec2_fw; + header = (const struct common_firmware_header *)info->fw->data; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes) - + le32_to_cpu(cp_hdr->jt_size) * 4, + PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; + info->fw = adev->gfx.mec2_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, + PAGE_SIZE); + } + } + +out: + if (err) { + dev_err(adev->dev, + "gfx10: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + } + + gfx_v10_0_check_gfxoff_flag(adev); + + return err; +} + +static u32 gfx_v10_0_get_csb_size(struct amdgpu_device *adev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + /* begin clear state */ + count += 2; + /* context control state */ + count += 3; + + for (sect = gfx10_cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) + count += 2 + ext->reg_count; + else + return 0; + } + } + + /* set PA_SC_TILE_STEERING_OVERRIDE */ + count += 3; + /* end clear state */ + count += 2; + /* clear state */ + count += 2; + + return count; +} + +static void gfx_v10_0_get_csb_buffer(struct amdgpu_device *adev, + volatile u32 *buffer) +{ + u32 count = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + int ctx_reg_offset; + + if (adev->gfx.rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + buffer[count++] = cpu_to_le32(0x80000000); + buffer[count++] = cpu_to_le32(0x80000000); + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = + cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); + buffer[count++] = cpu_to_le32(ext->reg_index - + PACKET3_SET_CONTEXT_REG_START); + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } else { + return; + } + } + } + + ctx_reg_offset = + SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + buffer[count++] = cpu_to_le32(ctx_reg_offset); + buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); + buffer[count++] = cpu_to_le32(0); +} + +static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev) +{ + /* clear state block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + + /* jump table block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); +} + +static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) +{ + const struct cs_section_def *cs_data; + int r; + + adev->gfx.rlc.cs_data = gfx10_cs_data; + + cs_data = adev->gfx.rlc.cs_data; + + if (cs_data) { + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) + return r; + } + + return 0; +} + +static int gfx_v10_0_csb_vram_pin(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, + AMDGPU_GEM_DOMAIN_VRAM); + if (!r) + adev->gfx.rlc.clear_state_gpu_addr = + amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); + + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + + return r; +} + +static void gfx_v10_0_csb_vram_unpin(struct amdgpu_device *adev) +{ + int r; + + if (!adev->gfx.rlc.clear_state_obj) + return; + + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); + if (likely(r == 0)) { + amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + } +} + +static void gfx_v10_0_mec_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); +} + +static int gfx_v10_0_me_init(struct amdgpu_device *adev) +{ + int r; + + bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); + + amdgpu_gfx_graphics_queue_acquire(adev); + + r = gfx_v10_0_init_microcode(adev); + if (r) + DRM_ERROR("Failed to load gfx firmware!\n"); + + return r; +} + +static int gfx_v10_0_mec_init(struct amdgpu_device *adev) +{ + int r; + u32 *hpd; + const __le32 *fw_data = NULL; + unsigned fw_size; + u32 *fw = NULL; + size_t mec_hpd_size; + + const struct gfx_firmware_header_v1_0 *mec_hdr = NULL; + + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE; + + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + gfx_v10_0_mec_fini(adev); + return r; + } + + memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); + + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + + fw_data = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.mec_fw_obj, + &adev->gfx.mec.mec_fw_gpu_addr, + (void **)&fw); + if (r) { + dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); + gfx_v10_0_mec_fini(adev); + return r; + } + + memcpy(fw, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + } + + return 0; +} + +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) +{ + WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (address << SQ_IND_INDEX__INDEX__SHIFT)); + return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); +} + +static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, + uint32_t thread, uint32_t regno, + uint32_t num, uint32_t *out) +{ + WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (regno << SQ_IND_INDEX__INDEX__SHIFT) | + (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | + (SQ_IND_INDEX__AUTO_INCR_MASK)); + while (num--) + *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); +} + +static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +{ + /* in gfx10 the SIMD_ID is specified as part of the INSTANCE + * field when performing a select_se_sh so it should be + * zero here */ + WARN_ON(simd != 0); + + /* type 2 wave data */ + dst[(*no_fields)++] = 2; + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_INST_DW0); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); +} + +static void gfx_v10_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t start, + uint32_t size, uint32_t *dst) +{ + WARN_ON(simd != 0); + + wave_read_regs( + adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, + dst); +} + +static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t thread, + uint32_t start, uint32_t size, + uint32_t *dst) +{ + wave_read_regs( + adev, wave, thread, + start + SQIND_WAVE_VGPRS_OFFSET, size, dst); +} + +static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 q, u32 vm) + { + nv_grbm_select(adev, me, pipe, q, vm); + } + + +static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v10_0_get_gpu_clock_counter, + .select_se_sh = &gfx_v10_0_select_se_sh, + .read_wave_data = &gfx_v10_0_read_wave_data, + .read_wave_sgprs = &gfx_v10_0_read_wave_sgprs, + .read_wave_vgprs = &gfx_v10_0_read_wave_vgprs, + .select_me_pipe_q = &gfx_v10_0_select_me_pipe_q, +}; + +static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) +{ + u32 gb_addr_config; + + adev->gfx.funcs = &gfx_v10_0_gfx_funcs; + + switch (adev->asic_type) { + case CHIP_NAVI10: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + break; + default: + BUG(); + break; + } + + adev->gfx.config.gb_addr_config = gb_addr_config; + + adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, NUM_PIPES); + + adev->gfx.config.max_tile_pipes = + adev->gfx.config.gb_addr_config_fields.num_pipes; + + adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); + adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, NUM_RB_PER_SE); + adev->gfx.config.gb_addr_config_fields.num_se = 1 << + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, NUM_SHADER_ENGINES); + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + + REG_GET_FIELD(adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); +} + +static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, + int me, int pipe, int queue) +{ + int r; + struct amdgpu_ring *ring; + unsigned int irq_type; + + ring = &adev->gfx.gfx_ring[ring_id]; + + ring->me = me; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + + if (!ring_id) + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; + sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + return 0; +} + +static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id * GFX10_MEC_HPD_SIZE); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + return 0; +} + +static int gfx_v10_0_sw_init(void *handle) +{ + int i, j, k, r, ring_id = 0; + struct amdgpu_kiq *kiq; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_NAVI10: + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 2; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + break; + default: + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 1; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + break; + } + + /* KIQ event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_10_1__SRCID__CP_IB2_INTERRUPT_PKT, + &adev->gfx.kiq.irq); + if (r) + return r; + + /* EOP Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, + GFX_10_1__SRCID__CP_EOP_INTERRUPT, + &adev->gfx.eop_irq); + if (r) + return r; + + /* Privileged reg */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_REG_FAULT, + &adev->gfx.priv_reg_irq); + if (r) + return r; + + /* Privileged inst */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_10_1__SRCID__CP_PRIV_INSTR_FAULT, + &adev->gfx.priv_inst_irq); + if (r) + return r; + + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; + + gfx_v10_0_scratch_init(adev); + + r = gfx_v10_0_me_init(adev); + if (r) + return r; + + r = gfx_v10_0_rlc_init(adev); + if (r) { + DRM_ERROR("Failed to init rlc BOs!\n"); + return r; + } + + r = gfx_v10_0_mec_init(adev); + if (r) { + DRM_ERROR("Failed to init MEC BOs!\n"); + return r; + } + + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v10_0_gfx_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + ring_id++; + } + } + } + + ring_id = 0; + /* set up the compute queues - allocate horizontally across pipes */ + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, + j)) + continue; + + r = gfx_v10_0_compute_ring_init(adev, ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } + } + } + + r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } + + kiq = &adev->gfx.kiq; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + if (r) + return r; + + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd)); + if (r) + return r; + + /* allocate visible FB for rlc auto-loading fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev); + if (r) + return r; + } + + adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE; + + gfx_v10_0_gpu_early_init(adev); + + return 0; +} + +static void gfx_v10_0_pfp_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, + &adev->gfx.pfp.pfp_fw_gpu_addr, + (void **)&adev->gfx.pfp.pfp_fw_ptr); +} + +static void gfx_v10_0_ce_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.ce.ce_fw_obj, + &adev->gfx.ce.ce_fw_gpu_addr, + (void **)&adev->gfx.ce.ce_fw_ptr); +} + +static void gfx_v10_0_me_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, + &adev->gfx.me.me_fw_gpu_addr, + (void **)&adev->gfx.me.me_fw_ptr); +} + +static int gfx_v10_0_sw_fini(void *handle) +{ + int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + + amdgpu_gfx_mqd_sw_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_kiq_fini(adev); + + gfx_v10_0_pfp_fini(adev); + gfx_v10_0_ce_fini(adev); + gfx_v10_0_me_fini(adev); + gfx_v10_0_rlc_fini(adev); + gfx_v10_0_mec_fini(adev); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) + gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev); + + gfx_v10_0_free_microcode(adev); + + return 0; +} + + +static void gfx_v10_0_tiling_mode_table_init(struct amdgpu_device *adev) +{ + /* TODO */ +} + +static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance) +{ + u32 data; + + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, + instance); + + if (se_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + + if (sh_num == 0xffffffff) + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, + 1); + else + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); + + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); +} + +static u32 gfx_v10_0_get_rb_active_bitmap(struct amdgpu_device *adev) +{ + u32 data, mask; + + data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); + data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); + + data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; + data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; + + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); + + return (~data) & mask; +} + +static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) +{ + int i, j; + u32 data; + u32 active_rbs = 0; + u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + data = gfx_v10_0_get_rb_active_bitmap(adev); + active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * + rb_bitmap_width_per_sh); + } + } + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + adev->gfx.config.backend_enable_mask = active_rbs; + adev->gfx.config.num_rbs = hweight32(active_rbs); +} + +static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *adev) +{ + uint32_t num_sc; + uint32_t enabled_rb_per_sh; + uint32_t active_rb_bitmap; + uint32_t num_rb_per_sc; + uint32_t num_packer_per_sc; + uint32_t pa_sc_tile_steering_override; + + /* init num_sc */ + num_sc = adev->gfx.config.max_shader_engines * adev->gfx.config.max_sh_per_se * + adev->gfx.config.num_sc_per_sh; + /* init num_rb_per_sc */ + active_rb_bitmap = gfx_v10_0_get_rb_active_bitmap(adev); + enabled_rb_per_sh = hweight32(active_rb_bitmap); + num_rb_per_sc = enabled_rb_per_sh / adev->gfx.config.num_sc_per_sh; + /* init num_packer_per_sc */ + num_packer_per_sc = adev->gfx.config.num_packer_per_sc; + + pa_sc_tile_steering_override = 0; + pa_sc_tile_steering_override |= + (order_base_2(num_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_SC__SHIFT) & + PA_SC_TILE_STEERING_OVERRIDE__NUM_SC_MASK; + pa_sc_tile_steering_override |= + (order_base_2(num_rb_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC__SHIFT) & + PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SC_MASK; + pa_sc_tile_steering_override |= + (order_base_2(num_packer_per_sc) << PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC__SHIFT) & + PA_SC_TILE_STEERING_OVERRIDE__NUM_PACKER_PER_SC_MASK; + + return pa_sc_tile_steering_override; +} + +#define DEFAULT_SH_MEM_BASES (0x6000) +#define FIRST_COMPUTE_VMID (8) +#define LAST_COMPUTE_VMID (16) + +static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + + mutex_lock(&adev->srbm_mutex); + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + nv_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); + } + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) +{ + int i, j, k; + int max_wgp_per_sh = adev->gfx.config.max_cu_per_sh >> 1; + u32 tmp, wgp_active_bitmap = 0; + u32 gcrd_targets_disable_tcp = 0; + u32 utcl_invreq_disable = 0; + /* + * GCRD_TARGETS_DISABLE field contains + * for Navi10: GL1C=[18:15], SQC=[14:10], TCP=[9:0] + */ + u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask( + 2 * max_wgp_per_sh + /* TCP */ + max_wgp_per_sh + /* SQC */ + 4); /* GL1C */ + /* + * UTCL1_UTCL0_INVREQ_DISABLE field contains + * for Navi10: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] + */ + u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask( + 2 * max_wgp_per_sh + /* TCP */ + 2 * max_wgp_per_sh + /* SQC */ + 4 + /* RMI */ + 1); /* SQG */ + + if (adev->asic_type == CHIP_NAVI10) { + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); + /* + * Set corresponding TCP bits for the inactive WGPs in + * GCRD_SA_TARGETS_DISABLE + */ + gcrd_targets_disable_tcp = 0; + /* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */ + utcl_invreq_disable = 0; + + for (k = 0; k < max_wgp_per_sh; k++) { + if (!(wgp_active_bitmap & (1 << k))) { + gcrd_targets_disable_tcp |= 3 << (2 * k); + utcl_invreq_disable |= (3 << (2 * k)) | + (3 << (2 * (max_wgp_per_sh + k))); + } + } + + tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE); + /* only override TCP & SQC bits */ + tmp &= 0xffffffff << (4 * max_wgp_per_sh); + tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask); + WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); + + tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE); + /* only override TCP bits */ + tmp &= 0xffffffff << (2 * max_wgp_per_sh); + tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask); + WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp); + } + } + + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + } +} + +static void gfx_v10_0_constants_init(struct amdgpu_device *adev) +{ + u32 tmp; + int i; + + WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + + gfx_v10_0_tiling_mode_table_init(adev); + + gfx_v10_0_setup_rb(adev); + gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); + adev->gfx.config.pa_sc_tile_steering_override = + gfx_v10_0_init_pa_sc_tile_steering_override(adev); + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { + nv_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); + if (i != 0) { + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> 48)); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); + } + } + nv_grbm_select(adev, 0, 0, 0, 0); + + mutex_unlock(&adev->srbm_mutex); + + gfx_v10_0_init_compute_vmid(adev); + +} + +static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); + + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, + enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, + enable ? 1 : 0); + + WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); +} + +static void gfx_v10_0_init_csb(struct amdgpu_device *adev) +{ + /* csib */ + WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); +} + +static void gfx_v10_0_init_pg(struct amdgpu_device *adev) +{ + gfx_v10_0_init_csb(adev); + + amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); + + /* TODO: init power gating */ + return; +} + +void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); + + tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); + WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp); +} + +static void gfx_v10_0_rlc_reset(struct amdgpu_device *adev) +{ + WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + udelay(50); + WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + udelay(50); +} + +static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, + bool enable) +{ + uint32_t rlc_pg_cntl; + + rlc_pg_cntl = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL); + + if (!enable) { + /* RLC_PG_CNTL[23] = 0 (default) + * RLC will wait for handshake acks with SMU + * GFXOFF will be enabled + * RLC_PG_CNTL[23] = 1 + * RLC will not issue any message to SMU + * hence no handshake between SMU & RLC + * GFXOFF will be disabled + */ + rlc_pg_cntl |= 0x80000; + } else + rlc_pg_cntl &= ~0x80000; + WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl); +} + +static void gfx_v10_0_rlc_start(struct amdgpu_device *adev) +{ + /* TODO: enable rlc & smu handshake until smu + * and gfxoff feature works as expected */ + if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) + gfx_v10_0_rlc_smu_handshake_cntl(adev, false); + + WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); + udelay(50); +} + +static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* enable Save Restore Machine */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); + tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; + tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); +} + +static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_0 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + if (!adev->gfx.rlc_fw) + return -EINVAL; + + hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(&hdr->header); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, + RLCG_UCODE_LOADING_START_ADDRESS); + + for (i = 0; i < fw_size; i++) + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, + le32_to_cpup(fw_data++)); + + WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); + + return 0; +} + +static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) +{ + int r; + + if (amdgpu_sriov_vf(adev)) + return 0; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); + if (r) + return r; + gfx_v10_0_init_pg(adev); + + /* enable RLC SRM */ + gfx_v10_0_rlc_enable_srm(adev); + + } else { + adev->gfx.rlc.funcs->stop(adev); + + /* disable CG */ + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); + + /* disable PG */ + WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + /* legacy rlc firmware loading */ + r = gfx_v10_0_rlc_load_microcode(adev); + if (r) + return r; + } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + /* rlc backdoor autoload firmware */ + r = gfx_v10_0_rlc_backdoor_autoload_enable(adev); + if (r) + return r; + } + + gfx_v10_0_init_pg(adev); + adev->gfx.rlc.funcs->start(adev); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); + if (r) + return r; + } + } + return 0; +} + +static struct { + FIRMWARE_ID id; + unsigned int offset; + unsigned int size; +} rlc_autoload_info[FIRMWARE_ID_MAX]; + +static int gfx_v10_0_parse_rlc_toc(struct amdgpu_device *adev) +{ + int ret; + RLC_TABLE_OF_CONTENT *rlc_toc; + + ret = amdgpu_bo_create_reserved(adev, adev->psp.toc_bin_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.rlc.rlc_toc_bo, + &adev->gfx.rlc.rlc_toc_gpu_addr, + (void **)&adev->gfx.rlc.rlc_toc_buf); + if (ret) { + dev_err(adev->dev, "(%d) failed to create rlc toc bo\n", ret); + return ret; + } + + /* Copy toc from psp sos fw to rlc toc buffer */ + memcpy(adev->gfx.rlc.rlc_toc_buf, adev->psp.toc_start_addr, adev->psp.toc_bin_size); + + rlc_toc = (RLC_TABLE_OF_CONTENT *)adev->gfx.rlc.rlc_toc_buf; + while (rlc_toc && (rlc_toc->id > FIRMWARE_ID_INVALID) && + (rlc_toc->id < FIRMWARE_ID_MAX)) { + if ((rlc_toc->id >= FIRMWARE_ID_CP_CE) && + (rlc_toc->id <= FIRMWARE_ID_CP_MES)) { + /* Offset needs 4KB alignment */ + rlc_toc->offset = ALIGN(rlc_toc->offset * 4, PAGE_SIZE); + } + + rlc_autoload_info[rlc_toc->id].id = rlc_toc->id; + rlc_autoload_info[rlc_toc->id].offset = rlc_toc->offset * 4; + rlc_autoload_info[rlc_toc->id].size = rlc_toc->size * 4; + + rlc_toc++; + }; + + return 0; +} + +static uint32_t gfx_v10_0_calc_toc_total_size(struct amdgpu_device *adev) +{ + uint32_t total_size = 0; + FIRMWARE_ID id; + int ret; + + ret = gfx_v10_0_parse_rlc_toc(adev); + if (ret) { + dev_err(adev->dev, "failed to parse rlc toc\n"); + return 0; + } + + for (id = FIRMWARE_ID_RLC_G_UCODE; id < FIRMWARE_ID_MAX; id++) + total_size += rlc_autoload_info[id].size; + + /* In case the offset in rlc toc ucode is aligned */ + if (total_size < rlc_autoload_info[FIRMWARE_ID_MAX-1].offset) + total_size = rlc_autoload_info[FIRMWARE_ID_MAX-1].offset + + rlc_autoload_info[FIRMWARE_ID_MAX-1].size; + + return total_size; +} + +static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev) +{ + int r; + uint32_t total_size; + + total_size = gfx_v10_0_calc_toc_total_size(adev); + + r = amdgpu_bo_create_reserved(adev, total_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.rlc.rlc_autoload_bo, + &adev->gfx.rlc.rlc_autoload_gpu_addr, + (void **)&adev->gfx.rlc.rlc_autoload_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); + return r; + } + + return 0; +} + +static void gfx_v10_0_rlc_backdoor_autoload_buffer_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_toc_bo, + &adev->gfx.rlc.rlc_toc_gpu_addr, + (void **)&adev->gfx.rlc.rlc_toc_buf); + amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, + &adev->gfx.rlc.rlc_autoload_gpu_addr, + (void **)&adev->gfx.rlc.rlc_autoload_ptr); +} + +static void gfx_v10_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, + FIRMWARE_ID id, + const void *fw_data, + uint32_t fw_size) +{ + uint32_t toc_offset; + uint32_t toc_fw_size; + char *ptr = adev->gfx.rlc.rlc_autoload_ptr; + + if (id <= FIRMWARE_ID_INVALID || id >= FIRMWARE_ID_MAX) + return; + + toc_offset = rlc_autoload_info[id].offset; + toc_fw_size = rlc_autoload_info[id].size; + + if (fw_size == 0) + fw_size = toc_fw_size; + + if (fw_size > toc_fw_size) + fw_size = toc_fw_size; + + memcpy(ptr + toc_offset, fw_data, fw_size); + + if (fw_size < toc_fw_size) + memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); +} + +static void gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev) +{ + void *data; + uint32_t size; + + data = adev->gfx.rlc.rlc_toc_buf; + size = rlc_autoload_info[FIRMWARE_ID_RLC_TOC].size; + + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_RLC_TOC, + data, size); +} + +static void gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + uint32_t fw_size; + const struct gfx_firmware_header_v1_0 *cp_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + + /* pfp ucode */ + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.pfp_fw->data; + fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + + le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_CP_PFP, + fw_data, fw_size); + + /* ce ucode */ + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.ce_fw->data; + fw_data = (const __le32 *)(adev->gfx.ce_fw->data + + le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_CP_CE, + fw_data, fw_size); + + /* me ucode */ + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.me_fw->data; + fw_data = (const __le32 *)(adev->gfx.me_fw->data + + le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_CP_ME, + fw_data, fw_size); + + /* rlc ucode */ + rlc_hdr = (const struct rlc_firmware_header_v2_0 *) + adev->gfx.rlc_fw->data; + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_RLC_G_UCODE, + fw_data, fw_size); + + /* mec1 ucode */ + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec_fw->data; + fw_data = (const __le32 *) (adev->gfx.mec_fw->data + + le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - + cp_hdr->jt_size * 4; + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_CP_MEC, + fw_data, fw_size); + /* mec2 ucode is not necessary if mec2 ucode is same as mec1 */ +} + +/* Temporarily put sdma part here */ +static void gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + uint32_t fw_size; + const struct sdma_firmware_header_v1_0 *sdma_hdr; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + sdma_hdr = (const struct sdma_firmware_header_v1_0 *) + adev->sdma.instance[i].fw->data; + fw_data = (const __le32 *) (adev->sdma.instance[i].fw->data + + le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(sdma_hdr->header.ucode_size_bytes); + + if (i == 0) { + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_SDMA0_UCODE, fw_data, fw_size); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_SDMA0_JT, + (uint32_t *)fw_data + + sdma_hdr->jt_offset, + sdma_hdr->jt_size * 4); + } else if (i == 1) { + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_SDMA1_UCODE, fw_data, fw_size); + gfx_v10_0_rlc_backdoor_autoload_copy_ucode(adev, + FIRMWARE_ID_SDMA1_JT, + (uint32_t *)fw_data + + sdma_hdr->jt_offset, + sdma_hdr->jt_size * 4); + } + } +} + +static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) +{ + uint32_t rlc_g_offset, rlc_g_size, tmp; + uint64_t gpu_addr; + + gfx_v10_0_rlc_backdoor_autoload_copy_toc_ucode(adev); + gfx_v10_0_rlc_backdoor_autoload_copy_sdma_ucode(adev); + gfx_v10_0_rlc_backdoor_autoload_copy_gfx_ucode(adev); + + rlc_g_offset = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].offset; + rlc_g_size = rlc_autoload_info[FIRMWARE_ID_RLC_G_UCODE].size; + gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; + + WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_HI, upper_32_bits(gpu_addr)); + WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_ADDR_LO, lower_32_bits(gpu_addr)); + WREG32_SOC15(GC, 0, mmRLC_HYP_BOOTLOAD_SIZE, rlc_g_size); + + tmp = RREG32_SOC15(GC, 0, mmRLC_HYP_RESET_VECTOR); + if (!(tmp & (RLC_HYP_RESET_VECTOR__COLD_BOOT_EXIT_MASK | + RLC_HYP_RESET_VECTOR__VDDGFX_EXIT_MASK))) { + DRM_ERROR("Neither COLD_BOOT_EXIT nor VDDGFX_EXIT is set\n"); + return -EINVAL; + } + + tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); + if (tmp & RLC_CNTL__RLC_ENABLE_F32_MASK) { + DRM_ERROR("RLC ROM should halt itself\n"); + return -EINVAL; + } + + return 0; +} + +static int gfx_v10_0_rlc_backdoor_autoload_config_me_cache(struct amdgpu_device *adev) +{ + uint32_t usec_timeout = 50000; /* wait for 50ms */ + uint32_t tmp; + int i; + uint64_t addr; + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + /* Program me ucode address into intruction cache address register */ + addr = adev->gfx.rlc.rlc_autoload_gpu_addr + + rlc_autoload_info[FIRMWARE_ID_CP_ME].offset; + WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO, + lower_32_bits(addr) & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI, + upper_32_bits(addr)); + + return 0; +} + +static int gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(struct amdgpu_device *adev) +{ + uint32_t usec_timeout = 50000; /* wait for 50ms */ + uint32_t tmp; + int i; + uint64_t addr; + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + /* Program ce ucode address into intruction cache address register */ + addr = adev->gfx.rlc.rlc_autoload_gpu_addr + + rlc_autoload_info[FIRMWARE_ID_CP_CE].offset; + WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO, + lower_32_bits(addr) & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI, + upper_32_bits(addr)); + + return 0; +} + +static int gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(struct amdgpu_device *adev) +{ + uint32_t usec_timeout = 50000; /* wait for 50ms */ + uint32_t tmp; + int i; + uint64_t addr; + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + /* Program pfp ucode address into intruction cache address register */ + addr = adev->gfx.rlc.rlc_autoload_gpu_addr + + rlc_autoload_info[FIRMWARE_ID_CP_PFP].offset; + WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO, + lower_32_bits(addr) & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI, + upper_32_bits(addr)); + + return 0; +} + +static int gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(struct amdgpu_device *adev) +{ + uint32_t usec_timeout = 50000; /* wait for 50ms */ + uint32_t tmp; + int i; + uint64_t addr; + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + /* Program mec1 ucode address into intruction cache address register */ + addr = adev->gfx.rlc.rlc_autoload_gpu_addr + + rlc_autoload_info[FIRMWARE_ID_CP_MEC].offset; + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, + lower_32_bits(addr) & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, + upper_32_bits(addr)); + + return 0; +} + +static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) +{ + uint32_t cp_status; + uint32_t bootload_status; + int i, r; + + for (i = 0; i < adev->usec_timeout; i++) { + cp_status = RREG32_SOC15(GC, 0, mmCP_STAT); + bootload_status = RREG32_SOC15(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS); + if ((cp_status == 0) && + (REG_GET_FIELD(bootload_status, + RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { + break; + } + udelay(1); + } + + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); + return -ETIMEDOUT; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { + r = gfx_v10_0_rlc_backdoor_autoload_config_me_cache(adev); + if (r) + return r; + + r = gfx_v10_0_rlc_backdoor_autoload_config_ce_cache(adev); + if (r) + return r; + + r = gfx_v10_0_rlc_backdoor_autoload_config_pfp_cache(adev); + if (r) + return r; + + r = gfx_v10_0_rlc_backdoor_autoload_config_mec_cache(adev); + if (r) + return r; + } + + return 0; +} + +static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) +{ + int i; + u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); + + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); + if (!enable) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + adev->gfx.gfx_ring[i].sched.ready = false; + } + WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); + udelay(50); +} + +static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) +{ + int r; + const struct gfx_firmware_header_v1_0 *pfp_hdr; + const __le32 *fw_data; + unsigned i, fw_size; + uint32_t tmp; + uint32_t usec_timeout = 50000; /* wait for 50ms */ + + pfp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.pfp_fw->data; + + amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); + + fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.pfp.pfp_fw_obj, + &adev->gfx.pfp.pfp_fw_gpu_addr, + (void **)&adev->gfx.pfp.pfp_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); + gfx_v10_0_pfp_fini(adev); + return r; + } + + memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); + amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + if (amdgpu_emu_mode == 1) + adev->nbio_funcs->hdp_flush(adev, NULL); + + tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL, tmp); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_LO, + adev->gfx.pfp.pfp_fw_gpu_addr & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_HI, + upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); + + return 0; +} + +static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) +{ + int r; + const struct gfx_firmware_header_v1_0 *ce_hdr; + const __le32 *fw_data; + unsigned i, fw_size; + uint32_t tmp; + uint32_t usec_timeout = 50000; /* wait for 50ms */ + + ce_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.ce_fw->data; + + amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); + + fw_data = (const __le32 *)(adev->gfx.ce_fw->data + + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, ce_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.ce.ce_fw_obj, + &adev->gfx.ce.ce_fw_gpu_addr, + (void **)&adev->gfx.ce.ce_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create ce fw bo\n", r); + gfx_v10_0_ce_fini(adev); + return r; + } + + memcpy(adev->gfx.ce.ce_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.ce.ce_fw_obj); + amdgpu_bo_unreserve(adev->gfx.ce.ce_fw_obj); + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_CE_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + if (amdgpu_emu_mode == 1) + adev->nbio_funcs->hdp_flush(adev, NULL); + + tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, CACHE_POLICY, 0); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, EXE_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, ADDRESS_CLAMP, 1); + WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_LO, + adev->gfx.ce.ce_fw_gpu_addr & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_HI, + upper_32_bits(adev->gfx.ce.ce_fw_gpu_addr)); + + return 0; +} + +static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) +{ + int r; + const struct gfx_firmware_header_v1_0 *me_hdr; + const __le32 *fw_data; + unsigned i, fw_size; + uint32_t tmp; + uint32_t usec_timeout = 50000; /* wait for 50ms */ + + me_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.me_fw->data; + + amdgpu_ucode_print_gfx_hdr(&me_hdr->header); + + fw_data = (const __le32 *)(adev->gfx.me_fw->data + + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.me.me_fw_obj, + &adev->gfx.me.me_fw_gpu_addr, + (void **)&adev->gfx.me.me_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); + gfx_v10_0_me_fini(adev); + return r; + } + + memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); + amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + if (amdgpu_emu_mode == 1) + adev->nbio_funcs->hdp_flush(adev, NULL); + + tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); + WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_LO, + adev->gfx.me.me_fw_gpu_addr & 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_HI, + upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); + + return 0; +} + +static int gfx_v10_0_cp_gfx_load_microcode(struct amdgpu_device *adev) +{ + int r; + + if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) + return -EINVAL; + + gfx_v10_0_cp_gfx_enable(adev, false); + + r = gfx_v10_0_cp_gfx_load_pfp_microcode(adev); + if (r) { + dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); + return r; + } + + r = gfx_v10_0_cp_gfx_load_ce_microcode(adev); + if (r) { + dev_err(adev->dev, "(%d) failed to load ce fw\n", r); + return r; + } + + r = gfx_v10_0_cp_gfx_load_me_microcode(adev); + if (r) { + dev_err(adev->dev, "(%d) failed to load me fw\n", r); + return r; + } + + return 0; +} + +static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + int r, i; + int ctx_reg_offset; + + /* init the CP */ + WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, + adev->gfx.config.max_hw_contexts - 1); + WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); + + gfx_v10_0_cp_gfx_enable(adev, true); + + ring = &adev->gfx.gfx_ring[0]; + r = amdgpu_ring_alloc(ring, gfx_v10_0_get_csb_size(adev) + 4); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + return r; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, 0x80000000); + amdgpu_ring_write(ring, 0x80000000); + + for (sect = gfx10_cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + amdgpu_ring_write(ring, + PACKET3(PACKET3_SET_CONTEXT_REG, + ext->reg_count)); + amdgpu_ring_write(ring, ext->reg_index - + PACKET3_SET_CONTEXT_REG_START); + for (i = 0; i < ext->reg_count; i++) + amdgpu_ring_write(ring, ext->extent[i]); + } + } + } + + ctx_reg_offset = + SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); + amdgpu_ring_write(ring, ctx_reg_offset); + amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); + + amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); + + amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); + amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); + amdgpu_ring_write(ring, 0x8000); + amdgpu_ring_write(ring, 0x8000); + + amdgpu_ring_commit(ring); + + /* submit cs packet to copy state 0 to next available state */ + ring = &adev->gfx.gfx_ring[1]; + r = amdgpu_ring_alloc(ring, 2); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + return r; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_commit(ring); + + return 0; +} + +static void gfx_v10_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, + CP_PIPE_ID pipe) +{ + u32 tmp; + + tmp = RREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); + + WREG32_SOC15(GC, 0, mmGRBM_GFX_CNTL, tmp); +} + +static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + u32 tmp; + + tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); + tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, ring->doorbell_index); + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); + + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); +} + +static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + u32 tmp; + u32 rb_bufsz; + u64 rb_addr, rptr_addr, wptr_gpu_addr; + u32 i; + + /* Set the write pointer delay */ + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); + + /* set the RB to use vmid 0 */ + WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); + + /* Init gfx ring 0 for pipe 0 */ + mutex_lock(&adev->srbm_mutex); + gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); + mutex_unlock(&adev->srbm_mutex); + /* Set ring buffer size */ + ring = &adev->gfx.gfx_ring[0]; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); +#endif + WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); + + /* Initialize the ring buffer's write pointers */ + ring->wptr = 0; + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); + + /* set the wb address wether it's enabled or not */ + rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); + WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & + CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); + + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + + mdelay(1); + WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); + + rb_addr = ring->gpu_addr >> 8; + WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); + WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); + + WREG32_SOC15(GC, 0, mmCP_RB_ACTIVE, 1); + + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + + /* Init gfx ring 1 for pipe 1 */ + mutex_lock(&adev->srbm_mutex); + gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); + mutex_unlock(&adev->srbm_mutex); + ring = &adev->gfx.gfx_ring[1]; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); + WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); + /* Initialize the ring buffer's write pointers */ + ring->wptr = 0; + WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); + /* Set the wb address wether it's enabled or not */ + rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); + WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & + CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + + mdelay(1); + WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); + + rb_addr = ring->gpu_addr >> 8; + WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr); + WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr)); + WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); + + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + + /* Switch to pipe 0 */ + mutex_lock(&adev->srbm_mutex); + gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); + mutex_unlock(&adev->srbm_mutex); + + /* start the ring */ + gfx_v10_0_cp_gfx_start(adev); + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + ring->sched.ready = true; + } + + return 0; +} + +static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) +{ + int i; + + if (enable) { + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0); + } else { + WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, + (CP_MEC_CNTL__MEC_ME1_HALT_MASK | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + adev->gfx.compute_ring[i].sched.ready = false; + adev->gfx.kiq.ring.sched.ready = false; + } + udelay(50); +} + +static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v1_0 *mec_hdr; + const __le32 *fw_data; + unsigned i; + u32 tmp; + u32 usec_timeout = 50000; /* Wait for 50 ms */ + + if (!adev->gfx.mec_fw) + return -EINVAL; + + gfx_v10_0_cp_compute_enable(adev, false); + + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); + + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + + /* Trigger an invalidation of the L1 instruction caches */ + tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL, tmp); + + /* Wait for invalidation complete */ + for (i = 0; i < usec_timeout; i++) { + tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_OP_CNTL); + if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, + INVALIDATE_CACHE_COMPLETE)) + break; + udelay(1); + } + + if (i >= usec_timeout) { + dev_err(adev->dev, "failed to invalidate instruction cache\n"); + return -EINVAL; + } + + if (amdgpu_emu_mode == 1) + adev->nbio_funcs->hdp_flush(adev, NULL); + + tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); + + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr & + 0xFFFFF000); + WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, + upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); + + /* MEC1 */ + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 0); + + for (i = 0; i < mec_hdr->jt_size; i++) + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, + le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); + + WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); + + /* + * TODO: Loading MEC2 firmware is only necessary if MEC2 should run + * different microcode than MEC1. + */ + + return 0; +} + +static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) +{ + uint32_t tmp; + struct amdgpu_device *adev = ring->adev; + + /* tell RLC which is KIQ queue */ + tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + tmp |= 0x80; + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); +} + +static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_gfx_mqd *mqd = ring->mqd_ptr; + uint64_t hqd_gpu_addr, wb_gpu_addr; + uint32_t tmp; + uint32_t rb_bufsz; + + /* set up gfx hqd wptr */ + mqd->cp_gfx_hqd_wptr = 0; + mqd->cp_gfx_hqd_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr = ring->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); + + /* set up mqd control */ + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); + mqd->cp_gfx_mqd_control = tmp; + + /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); + mqd->cp_gfx_hqd_vmid = 0; + + /* set up default queue priority level + * 0x0 = low priority, 0x1 = high priority */ + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); + mqd->cp_gfx_hqd_queue_priority = tmp; + + /* set up time quantum */ + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); + mqd->cp_gfx_hqd_quantum = tmp; + + /* set up gfx hqd base. this is similar as CP_RB_BASE */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_gfx_hqd_base = hqd_gpu_addr; + mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; + mqd->cp_gfx_hqd_rptr_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set up rb_wptr_poll addr */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ + rb_bufsz = order_base_2(ring->ring_size / 4) - 1; + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); +#endif + mqd->cp_gfx_hqd_cntl = tmp; + + /* set up cp_doorbell_control */ + tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 0); + mqd->cp_rb_doorbell_control = tmp; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR); + + /* active the queue */ + mqd->cp_gfx_hqd_active = 1; + + return 0; +} + +#ifdef BRING_UP_DEBUG +static int gfx_v10_0_gfx_queue_init_register(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_gfx_mqd *mqd = ring->mqd_ptr; + + /* set mmCP_GFX_HQD_WPTR/_HI to 0 */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR, mqd->cp_gfx_hqd_wptr); + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_WPTR_HI, mqd->cp_gfx_hqd_wptr_hi); + + /* set GFX_MQD_BASE */ + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr); + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); + + /* set GFX_MQD_CONTROL */ + WREG32_SOC15(GC, 0, mmCP_GFX_MQD_CONTROL, mqd->cp_gfx_mqd_control); + + /* set GFX_HQD_VMID to 0 */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_VMID, mqd->cp_gfx_hqd_vmid); + + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY, + mqd->cp_gfx_hqd_queue_priority); + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM, mqd->cp_gfx_hqd_quantum); + + /* set GFX_HQD_BASE, similar as CP_RB_BASE */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE, mqd->cp_gfx_hqd_base); + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_BASE_HI, mqd->cp_gfx_hqd_base_hi); + + /* set GFX_HQD_RPTR_ADDR, similar as CP_RB_RPTR */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR, mqd->cp_gfx_hqd_rptr_addr); + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR_ADDR_HI, mqd->cp_gfx_hqd_rptr_addr_hi); + + /* set GFX_HQD_CNTL, similar as CP_RB_CNTL */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_CNTL, mqd->cp_gfx_hqd_cntl); + + /* set RB_WPTR_POLL_ADDR */ + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, mqd->cp_rb_wptr_poll_addr_lo); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, mqd->cp_rb_wptr_poll_addr_hi); + + /* set RB_DOORBELL_CONTROL */ + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, mqd->cp_rb_doorbell_control); + + /* active the queue */ + WREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE, mqd->cp_gfx_hqd_active); + + return 0; +} +#endif + +static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_gfx_mqd *mqd = ring->mqd_ptr; + + if (!adev->in_gpu_reset && !adev->in_suspend) { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v10_0_gfx_mqd_init(ring); +#ifdef BRING_UP_DEBUG + gfx_v10_0_gfx_queue_init_register(ring); +#endif + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) + memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd)); + } else if (adev->in_gpu_reset) { + /* reset mqd with the backup copy */ + if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) + memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd)); + /* reset the ring */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); +#ifdef BRING_UP_DEBUG + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v10_0_gfx_queue_init_register(ring); + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +#endif + } else { + amdgpu_ring_clear_ring(ring); + } + + return 0; +} + +#ifndef BRING_UP_DEBUG +static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + int r, i; + + if (!kiq->pmf || !kiq->pmf->kiq_map_queues) + return -EINVAL; + + r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * + adev->gfx.num_gfx_rings); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + return r; + } + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]); + + r = amdgpu_ring_test_ring(kiq_ring); + if (r) { + DRM_ERROR("kfq enable failed\n"); + kiq_ring->sched.ready = false; + } + return r; +} +#endif + +static int gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) +{ + int r, i; + struct amdgpu_ring *ring; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v10_0_gfx_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + } +#ifndef BRING_UP_DEBUG + r = gfx_v10_0_kiq_enable_kgq(adev); + if (r) + goto done; +#endif + r = gfx_v10_0_cp_gfx_start(adev); + if (r) + goto done; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + ring->sched.ready = true; + } +done: + return r; +} + +static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_compute_mqd *mqd = ring->mqd_ptr; + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; + uint32_t tmp; + + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000003; + + eop_base_addr = ring->eop_gpu_addr >> 8; + mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(GFX10_MEC_HPD_SIZE / 4) - 1)); + + mqd->cp_hqd_eop_control = tmp; + + /* enable doorbell? */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); + + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } else { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* disable the queue if it's active */ + ring->wptr = 0; + mqd->cp_hqd_dequeue_request = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); + + /* set MQD vmid to 0 */ + tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(ring->ring_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); +#endif + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + mqd->cp_hqd_pq_control = tmp; + + /* set the wb address whether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + tmp = 0; + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); + mqd->cp_hqd_persistent_state = tmp; + + /* set MIN_IB_AVAIL_SIZE */ + tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); + mqd->cp_hqd_ib_control = tmp; + + /* activate the queue */ + mqd->cp_hqd_active = 1; + + return 0; +} + +static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_compute_mqd *mqd = ring->mqd_ptr; + int j; + + /* disable wptr polling */ + WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); + + /* write the EOP addr */ + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, + mqd->cp_hqd_eop_base_addr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, + mqd->cp_hqd_eop_base_addr_hi); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, + mqd->cp_hqd_eop_control); + + /* enable doorbell? */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* disable the queue if it's active */ + if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, + mqd->cp_hqd_dequeue_request); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, + mqd->cp_hqd_pq_rptr); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + } + + /* set the pointer to the MQD */ + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, + mqd->cp_mqd_base_addr_lo); + WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, + mqd->cp_mqd_base_addr_hi); + + /* set MQD vmid to 0 */ + WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, + mqd->cp_mqd_control); + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, + mqd->cp_hqd_pq_base_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, + mqd->cp_hqd_pq_base_hi); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, + mqd->cp_hqd_pq_control); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, + (adev->doorbell_index.kiq * 2) << 2); + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, + (adev->doorbell_index.userqueue_end * 2) << 2); + } + + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, + mqd->cp_hqd_pq_doorbell_control); + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, + mqd->cp_hqd_pq_wptr_lo); + WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, + mqd->cp_hqd_pq_wptr_hi); + + /* set the vmid for the queue */ + WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); + + WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, + mqd->cp_hqd_persistent_state); + + /* activate the queue */ + WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, + mqd->cp_hqd_active); + + if (ring->use_doorbell) + WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); + + return 0; +} + +static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_compute_mqd *mqd = ring->mqd_ptr; + int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; + + gfx_v10_0_kiq_setting(ring); + + if (adev->in_gpu_reset) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v10_0_kiq_init_register(ring); + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v10_0_compute_mqd_init(ring); + gfx_v10_0_kiq_init_register(ring); + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } + + return 0; +} + +static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_compute_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.compute_ring[0]; + + if (!adev->in_gpu_reset && !adev->in_suspend) { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v10_0_compute_mqd_init(ring); + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + } else { + amdgpu_ring_clear_ring(ring); + } + + return 0; +} + +static int gfx_v10_0_kiq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int r; + + ring = &adev->gfx.kiq.ring; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + return r; + + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (unlikely(r != 0)) + return r; + + gfx_v10_0_kiq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + amdgpu_bo_unreserve(ring->mqd_obj); + ring->sched.ready = true; + return 0; +} + +static int gfx_v10_0_kcq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r = 0, i; + + gfx_v10_0_cp_compute_enable(adev, true); + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v10_0_kcq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + } + + r = amdgpu_gfx_enable_kcq(adev); +done: + return r; +} + +static int gfx_v10_0_cp_resume(struct amdgpu_device *adev) +{ + int r, i; + struct amdgpu_ring *ring; + + if (!(adev->flags & AMD_IS_APU)) + gfx_v10_0_enable_gui_idle_interrupt(adev, false); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + /* legacy firmware loading */ + r = gfx_v10_0_cp_gfx_load_microcode(adev); + if (r) + return r; + + r = gfx_v10_0_cp_compute_load_microcode(adev); + if (r) + return r; + } + + r = gfx_v10_0_kiq_resume(adev); + if (r) + return r; + + r = gfx_v10_0_kcq_resume(adev); + if (r) + return r; + + if (!amdgpu_async_gfx_ring) { + r = gfx_v10_0_cp_gfx_resume(adev); + if (r) + return r; + } else { + r = gfx_v10_0_cp_async_gfx_ring_resume(adev); + if (r) + return r; + } + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + DRM_INFO("gfx %d ring me %d pipe %d q %d\n", + i, ring->me, ring->pipe, ring->queue); + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + return r; + } + } + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + ring->sched.ready = true; + DRM_INFO("compute ring %d mec %d pipe %d q %d\n", + i, ring->me, ring->pipe, ring->queue); + r = amdgpu_ring_test_ring(ring); + if (r) + ring->sched.ready = false; + } + + return 0; +} + +static void gfx_v10_0_cp_enable(struct amdgpu_device *adev, bool enable) +{ + gfx_v10_0_cp_gfx_enable(adev, enable); + gfx_v10_0_cp_compute_enable(adev, enable); +} + +static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) +{ + uint32_t data, pattern = 0xDEADBEEF; + + /* check if mmVGT_ESGS_RING_SIZE_UMD + * has been remapped to mmVGT_ESGS_RING_SIZE */ + data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); + + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0); + + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); + + if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) { + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data); + return true; + } else { + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data); + return false; + } +} + +static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) +{ + uint32_t data; + + /* initialize cam_index to 0 + * index will auto-inc after each data writting */ + WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0); + + /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_TF_MEMORY_BASE_UMD -> mmVGT_TF_MEMORY_BASE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_TF_MEMORY_BASE_HI_UMD -> mmVGT_TF_MEMORY_BASE_HI */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_MEMORY_BASE_HI) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_HS_OFFCHIP_PARAM_UMD -> mmVGT_HS_OFFCHIP_PARAM */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_HS_OFFCHIP_PARAM) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_ESGS_RING_SIZE_UMD -> mmVGT_ESGS_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_ESGS_RING_SIZE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmVGT_GSVS_RING_SIZE_UMD -> mmVGT_GSVS_RING_SIZE */ + data = (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE_UMD) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmVGT_GSVS_RING_SIZE) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); + + /* mmSPI_CONFIG_CNTL_REMAP -> mmSPI_CONFIG_CNTL */ + data = (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_REMAP) << + GRBM_CAM_DATA__CAM_ADDR__SHIFT) | + (SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL) << + GRBM_CAM_DATA__CAM_REMAPADDR__SHIFT); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA_UPPER, 0); + WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); +} + +static int gfx_v10_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = gfx_v10_0_csb_vram_pin(adev); + if (r) + return r; + + if (!amdgpu_emu_mode) + gfx_v10_0_init_golden_registers(adev); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + /** + * For gfx 10, rlc firmware loading relies on smu firmware is + * loaded firstly, so in direct type, it has to load smc ucode + * here before rlc. + */ + r = smu_load_microcode(&adev->smu); + if (r) + return r; + + r = smu_check_fw_status(&adev->smu); + if (r) { + pr_err("SMC firmware status is not correct\n"); + return r; + } + } + + /* if GRBM CAM not remapped, set up the remapping */ + if (!gfx_v10_0_check_grbm_cam_remapping(adev)) + gfx_v10_0_setup_grbm_cam_remapping(adev); + + gfx_v10_0_constants_init(adev); + + r = gfx_v10_0_rlc_resume(adev); + if (r) + return r; + + /* + * init golden registers and rlc resume may override some registers, + * reconfig them here + */ + gfx_v10_0_tcp_harvest(adev); + + r = gfx_v10_0_cp_resume(adev); + if (r) + return r; + + return r; +} + +#ifndef BRING_UP_DEBUG +static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &kiq->ring; + int i; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * + adev->gfx.num_gfx_rings)) + return -ENOMEM; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], + PREEMPT_QUEUES, 0, 0); + + return amdgpu_ring_test_ring(kiq_ring); +} +#endif + +static int gfx_v10_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); +#ifndef BRING_UP_DEBUG + if (amdgpu_async_gfx_ring) { + r = gfx_v10_0_kiq_disable_kgq(adev); + if (r) + DRM_ERROR("KGQ disable failed\n"); + } +#endif + if (amdgpu_gfx_disable_kcq(adev)) + DRM_ERROR("KCQ disable failed\n"); + if (amdgpu_sriov_vf(adev)) { + pr_debug("For SRIOV client, shouldn't do anything.\n"); + return 0; + } + gfx_v10_0_cp_enable(adev, false); + gfx_v10_0_enable_gui_idle_interrupt(adev, false); + gfx_v10_0_csb_vram_unpin(adev); + + return 0; +} + +static int gfx_v10_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->in_suspend = true; + return gfx_v10_0_hw_fini(adev); +} + +static int gfx_v10_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = gfx_v10_0_hw_init(adev); + adev->in_suspend = false; + return r; +} + +static bool gfx_v10_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), + GRBM_STATUS, GUI_ACTIVE)) + return false; + else + return true; +} + +static int gfx_v10_0_wait_for_idle(void *handle) +{ + unsigned i; + u32 tmp; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS) & + GRBM_STATUS__GUI_ACTIVE_MASK; + + if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static int gfx_v10_0_soft_reset(void *handle) +{ + u32 grbm_soft_reset = 0; + u32 tmp; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* GRBM_STATUS */ + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); + if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | + GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | + GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK | + GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK | + GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK + | GRBM_STATUS__BCI_BUSY_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, + 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_GFX, + 1); + } + + if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, + 1); + } + + /* GRBM_STATUS2 */ + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); + if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_RLC, + 1); + + if (grbm_soft_reset) { + /* stop the rlc */ + gfx_v10_0_rlc_stop(adev); + + /* Disable GFX parsing/prefetching */ + gfx_v10_0_cp_gfx_enable(adev, false); + + /* Disable MEC parsing/prefetching */ + gfx_v10_0_cp_compute_enable(adev, false); + + if (grbm_soft_reset) { + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + } + + /* Wait a little for things to settle down */ + udelay(50); + } + return 0; +} + +static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + uint64_t clock; + + mutex_lock(&adev->gfx.gpu_clock_mutex); + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + mutex_unlock(&adev->gfx.gpu_clock_mutex); + return clock; +} + +static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring, + uint32_t vmid, + uint32_t gds_base, uint32_t gds_size, + uint32_t gws_base, uint32_t gws_size, + uint32_t oa_base, uint32_t oa_size) +{ + struct amdgpu_device *adev = ring->adev; + + /* GDS Base */ + gfx_v10_0_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, + gds_base); + + /* GDS Size */ + gfx_v10_0_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, + gds_size); + + /* GWS */ + gfx_v10_0_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, + gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); + + /* OA */ + gfx_v10_0_write_data_to_reg(ring, 0, false, + SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, + (1 << (oa_size + oa_base)) - (1 << oa_base)); +} + +static int gfx_v10_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + + gfx_v10_0_set_kiq_pm4_funcs(adev); + gfx_v10_0_set_ring_funcs(adev); + gfx_v10_0_set_irq_funcs(adev); + gfx_v10_0_set_gds_init(adev); + gfx_v10_0_set_rlc_funcs(adev); + + return 0; +} + +static int gfx_v10_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); + if (r) + return r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); + if (r) + return r; + + return 0; +} + +static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev) +{ + uint32_t rlc_cntl; + + /* if RLC is not enabled, do nothing */ + rlc_cntl = RREG32_SOC15(GC, 0, mmRLC_CNTL); + return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; +} + +static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + unsigned i; + + data = RLC_SAFE_MODE__CMD_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); + + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + break; + udelay(1); + } +} + +static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RLC_SAFE_MODE__CMD_MASK; + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); +} + +static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + /* It is disabled by HW by default */ + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { + /* 1 - RLC_CGTT_MGCG_OVERRIDE */ + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + + /* only for Vega10 & Raven1 */ + data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; + + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + + /* MGLS is a global flag to control all MGLS in GFX */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { + /* 2 - RLC memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { + def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); + } + /* 3 - CP memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { + def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); + } + } + } else { + /* 1 - MGCG_OVERRIDE */ + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + + /* 2 - disable MGLS in RLC */ + data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { + data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); + } + + /* 3 - disable MGLS in CP */ + data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { + data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); + } + } +} + +static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + /* Enable 3D CGCG/CGLS */ + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { + /* write cmd to clear cgcg/cgls ov */ + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + /* enable 3Dcgcg FSM(0x0000363f) */ + def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) + data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); + data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + if (def != data) + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); + } else { + /* Disable CGCG/CGLS */ + def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + /* disable cgcg, cgls should be disabled */ + data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | + RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); + /* disable cgcg and cgls in FSM */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); + } +} + +static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset CGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + + /* enable cgcg FSM(0x0000363F) */ + def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); + data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + if (def != data) + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); + } else { + def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + /* reset CGCG/CGLS bits */ + data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + /* disable cgcg and cgls in FSM */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); + } +} + +static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev); + + if (enable) { + /* CGCG/CGLS should be enabled after MGCG/MGLS + * === MGCG + MGLS === + */ + gfx_v10_0_update_medium_grain_clock_gating(adev, enable); + /* === CGCG /CGLS for GFX 3D Only === */ + gfx_v10_0_update_3d_clock_gating(adev, enable); + /* === CGCG + CGLS === */ + gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); + } else { + /* CGCG/CGLS should be disabled before MGCG/MGLS + * === CGCG + CGLS === + */ + gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); + /* === CGCG /CGLS for GFX 3D Only === */ + gfx_v10_0_update_3d_clock_gating(adev, enable); + /* === MGCG + MGLS === */ + gfx_v10_0_update_medium_grain_clock_gating(adev, enable); + } + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS)) + gfx_v10_0_enable_gui_idle_interrupt(adev, enable); + + amdgpu_gfx_rlc_exit_safe_mode(adev); + + return 0; +} + +static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = { + .is_rlc_enabled = gfx_v10_0_is_rlc_enabled, + .set_safe_mode = gfx_v10_0_set_safe_mode, + .unset_safe_mode = gfx_v10_0_unset_safe_mode, + .init = gfx_v10_0_rlc_init, + .get_csb_size = gfx_v10_0_get_csb_size, + .get_csb_buffer = gfx_v10_0_get_csb_buffer, + .resume = gfx_v10_0_rlc_resume, + .stop = gfx_v10_0_rlc_stop, + .reset = gfx_v10_0_rlc_reset, + .start = gfx_v10_0_rlc_start +}; + +static int gfx_v10_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + switch (adev->asic_type) { + case CHIP_NAVI10: + if (!enable) { + amdgpu_gfx_off_ctrl(adev, false); + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); + } else + amdgpu_gfx_off_ctrl(adev, true); + break; + default: + break; + } + return 0; +} + +static int gfx_v10_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_NAVI10: + gfx_v10_0_update_gfx_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; +} + +static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + /* AMD_CG_SUPPORT_GFX_MGCG */ + data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_MGCG; + + /* AMD_CG_SUPPORT_GFX_CGCG */ + data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); + if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGCG; + + /* AMD_CG_SUPPORT_GFX_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGLS; + + /* AMD_CG_SUPPORT_GFX_RLC_LS */ + data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; + + /* AMD_CG_SUPPORT_GFX_CP_LS */ + data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; + + /* AMD_CG_SUPPORT_GFX_3D_CGCG */ + data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; + + /* AMD_CG_SUPPORT_GFX_3D_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; +} + +static u64 gfx_v10_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) +{ + return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 is 32bit rptr*/ +} + +static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); + } else { + wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); + wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; + } + + return wptr; +} + +static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); + } +} + +static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring) +{ + return ring->adev->wb.wb[ring->rptr_offs]; /* gfx10 hardware is 32bit rptr */ +} + +static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring) +{ + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); + else + BUG(); + return wptr; +} + +static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); /* only DOORBELL method supported on gfx10 now */ + } +} + +static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 ref_and_mask, reg_mem_engine; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + switch (ring->me) { + case 1: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; + break; + case 2: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; + break; + default: + return; + } + reg_mem_engine = 0; + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + reg_mem_engine = 1; /* pfp */ + } + + gfx_v10_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, + adev->nbio_funcs->get_hdp_flush_req_offset(adev), + adev->nbio_funcs->get_hdp_flush_done_offset(adev), + ref_and_mask, ref_and_mask, 0x20); +} + +static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 header, control = 0; + + /* Prevent a hw deadlock due to a wave ID mismatch between ME and GDS. + * This resets the wave ID counters. (needed by transform feedback) + * TODO: This might only be needed on a VMID switch when we change + * the GDS OA mapping, not sure. + */ + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmVGT_GS_MAX_WAVE_ID); + amdgpu_ring_write(ring, ring->adev->gds.vgt_gs_max_wave_id); + + if (ib->flags & AMDGPU_IB_FLAG_CE) + header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2); + else + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + + control |= ib->length_dw | (vmid << 24); + + if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + control |= INDIRECT_BUFFER_PRE_ENB(1); + + if (flags & AMDGPU_IB_PREEMPTED) + control |= INDIRECT_BUFFER_PRE_RESUME(1); + + if (!(ib->flags & AMDGPU_IB_FLAG_CE)) + gfx_v10_0_ring_emit_de_meta(ring, + flags & AMDGPU_IB_PREEMPTED ? true : false); + } + + amdgpu_ring_write(ring, header); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + + /* Currently, there is a high possibility to get wave ID mismatch + * between ME and GDS, leading to a hw deadlock, because ME generates + * different wave IDs than the GDS expects. This situation happens + * randomly when at least 5 compute pipes use GDS ordered append. + * The wave IDs generated by ME are also wrong after suspend/resume. + * Those are probably bugs somewhere else in the kernel driver. + * + * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and + * GDS to 0 for this ring (me/pipe). + */ + if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); + amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + struct amdgpu_device *adev = ring->adev; + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + + /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ + if (adev->pdev->device == 0x50) + int_sel = false; + + /* RELEASE_MEM - flush caches, send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); + amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | + PACKET3_RELEASE_MEM_GCR_GL2_WB | + PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ + PACKET3_RELEASE_MEM_GCR_GLM_WB | + PACKET3_RELEASE_MEM_CACHE_POLICY(3) | + PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + PACKET3_RELEASE_MEM_EVENT_INDEX(5))); + amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | + PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); + + /* + * the address should be Qword aligned if 64bit write, Dword + * aligned if only send 32bit data low (discard data high) + */ + if (write64bit) + BUG_ON(addr & 0x7); + else + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + gfx_v10_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), + upper_32_bits(addr), seq, 0xffffffff, 4); +} + +static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* compute doesn't have PFP */ + if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + amdgpu_ring_write(ring, 0x0); + } +} + +static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned int flags) +{ + struct amdgpu_device *adev = ring->adev; + + /* we only allocate 32bit for each seq wb address */ + BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + /* write fence seq to the "addr" */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + if (flags & AMDGPU_FENCE_FLAG_INT) { + /* set register to trigger INT */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); + amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ + } +} + +static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +{ + uint32_t dw2 = 0; + + if (amdgpu_mcbp) + gfx_v10_0_ring_emit_ce_meta(ring, + flags & AMDGPU_IB_PREEMPTED ? true : false); + + gfx_v10_0_ring_emit_tmz(ring, true); + + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ + if (flags & AMDGPU_HAVE_CTX_SWITCH) { + /* set load_global_config & load_global_uconfig */ + dw2 |= 0x8001; + /* set load_cs_sh_regs */ + dw2 |= 0x01000000; + /* set load_per_context_state & load_gfx_sh_regs for GFX */ + dw2 |= 0x10002; + + /* set load_ce_ram if preamble presented */ + if (AMDGPU_PREAMBLE_IB_PRESENT & flags) + dw2 |= 0x10000000; + } else { + /* still load_ce_ram if this is the first time preamble presented + * although there is no context switch happens. + */ + if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) + dw2 |= 0x10000000; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, dw2); + amdgpu_ring_write(ring, 0); +} + +static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +{ + unsigned ret; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); + amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + ret = ring->wptr & ring->buf_mask; + amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + + return ret; +} + +static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) +{ + unsigned cur; + BUG_ON(offset > ring->buf_mask); + BUG_ON(ring->ring[offset] != 0x55aa55aa); + + cur = (ring->wptr - 1) & ring->buf_mask; + if (likely(cur > offset)) + ring->ring[offset] = cur - offset; + else + ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; +} + +static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) +{ + int i, r = 0; + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *kiq_ring = &kiq->ring; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) + return -ENOMEM; + + /* assert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, false); + + /* assert IB preemption, emit the trailing fence */ + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, + ring->trail_fence_gpu_addr, + ++ring->trail_seq); + amdgpu_ring_commit(kiq_ring); + + /* poll the trailing fence */ + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->trail_seq == + le32_to_cpu(*(ring->trail_fence_cpu_addr))) + break; + DRM_UDELAY(1); + } + + if (i >= adev->usec_timeout) { + r = -EINVAL; + DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); + } + + /* deassert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, true); + return r; +} + +static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_ce_ib_state ce_payload = {0}; + uint64_t csa_addr; + int cnt; + + cnt = (sizeof(ce_payload) >> 2) + 4 - 2; + csa_addr = amdgpu_csa_vaddr(ring->adev); + + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | + WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(csa_addr + + offsetof(struct v10_gfx_meta_data, ce_payload))); + amdgpu_ring_write(ring, upper_32_bits(csa_addr + + offsetof(struct v10_gfx_meta_data, ce_payload))); + + if (resume) + amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr + + offsetof(struct v10_gfx_meta_data, + ce_payload), + sizeof(ce_payload) >> 2); + else + amdgpu_ring_write_multiple(ring, (void *)&ce_payload, + sizeof(ce_payload) >> 2); +} + +static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) +{ + struct amdgpu_device *adev = ring->adev; + struct v10_de_ib_state de_payload = {0}; + uint64_t csa_addr, gds_addr; + int cnt; + + csa_addr = amdgpu_csa_vaddr(ring->adev); + gds_addr = ALIGN(csa_addr + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); + de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + + cnt = (sizeof(de_payload) >> 2) + 4 - 2; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | + WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(csa_addr + + offsetof(struct v10_gfx_meta_data, de_payload))); + amdgpu_ring_write(ring, upper_32_bits(csa_addr + + offsetof(struct v10_gfx_meta_data, de_payload))); + + if (resume) + amdgpu_ring_write_multiple(ring, adev->virt.csa_cpu_addr + + offsetof(struct v10_gfx_meta_data, + de_payload), + sizeof(de_payload) >> 2); + else + amdgpu_ring_write_multiple(ring, (void *)&de_payload, + sizeof(de_payload) >> 2); +} + +static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); + amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ +} + +static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); + amdgpu_ring_write(ring, 0 | /* src: register*/ + (5 << 8) | /* dst: memory */ + (1 << 20)); /* write confirm */ + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + + adev->virt.reg_val_offs * 4)); + amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + + adev->virt.reg_val_offs * 4)); +} + +static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val) +{ + uint32_t cmd = 0; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; + break; + case AMDGPU_RING_TYPE_KIQ: + cmd = (1 << 16); /* no inc addr */ + break; + default: + cmd = WR_CONFIRM; + break; + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, cmd); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); +} + +static void +gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, + uint32_t me, uint32_t pipe, + enum amdgpu_interrupt_state state) +{ + uint32_t cp_int_cntl, cp_int_cntl_reg; + + if (!me) { + switch (pipe) { + case 0: + cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0); + break; + case 1: + cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING1); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); + return; + } + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + cp_int_cntl = RREG32(cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + TIME_STAMP_INT_ENABLE, 0); + WREG32(cp_int_cntl_reg, cp_int_cntl); + case AMDGPU_IRQ_STATE_ENABLE: + cp_int_cntl = RREG32(cp_int_cntl_reg); + cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, + TIME_STAMP_INT_ENABLE, 1); + WREG32(cp_int_cntl_reg, cp_int_cntl); + break; + default: + break; + } +} + +static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, + int me, int pipe, + enum amdgpu_interrupt_state state) +{ + u32 mec_int_cntl, mec_int_cntl_reg; + + /* + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other + * pipes' interrupts are set by amdkfd. + */ + + if (me == 1) { + switch (pipe) { + case 0: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + break; + case 1: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + break; + case 2: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + break; + case 3: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); + return; + } + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 0); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 1); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + default: + break; + } +} + +static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (type) { + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: + gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); + break; + case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: + gfx_v10_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 0, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 2, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 1, 3, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 0, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 2, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: + gfx_v10_0_set_compute_eop_interrupt_state(adev, 2, 3, state); + break; + default: + break; + } + return 0; +} + +static int gfx_v10_0_eop_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + int i; + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + + DRM_DEBUG("IH: CP EOP\n"); + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + if (pipe_id == 0) + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + else + amdgpu_fence_process(&adev->gfx.gfx_ring[1]); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + /* Per-queue interrupt is supported for MEC starting from VI. + * The interrupt can only be enabled/disabled per pipe instead of per queue. + */ + if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) + amdgpu_fence_process(ring); + } + break; + } + return 0; +} + +static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } + + return 0; +} + +static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + default: + break; + } + + return 0; +} + +static void gfx_v10_0_handle_priv_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + int i; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + /* we only enabled 1 gfx queue per pipe for now */ + if (ring->me == me_id && ring->pipe == pipe_id) + drm_sched_fault(&ring->sched); + } + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + default: + BUG(); + } +} + +static int gfx_v10_0_priv_reg_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal register access in command stream\n"); + gfx_v10_0_handle_priv_fault(adev, entry); + return 0; +} + +static int gfx_v10_0_priv_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal instruction in command stream\n"); + gfx_v10_0_handle_priv_fault(adev, entry); + return 0; +} + +static int gfx_v10_0_kiq_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + uint32_t tmp, target; + struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); + + if (ring->me == 1) + target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + else + target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL); + target += ring->pipe; + + switch (type) { + case AMDGPU_CP_KIQ_IRQ_DRIVER0: + if (state == AMDGPU_IRQ_STATE_DISABLE) { + tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, + GENERIC2_INT_ENABLE, 0); + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); + + tmp = RREG32(target); + tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, + GENERIC2_INT_ENABLE, 0); + WREG32(target, tmp); + } else { + tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, + GENERIC2_INT_ENABLE, 1); + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); + + tmp = RREG32(target); + tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, + GENERIC2_INT_ENABLE, 1); + WREG32(target, tmp); + } + break; + default: + BUG(); /* kiq only support GENERIC2_INT now */ + break; + } + return 0; +} + +static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", + me_id, pipe_id, queue_id); + + amdgpu_fence_process(ring); + return 0; +} + +static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { + .name = "gfx_v10_0", + .early_init = gfx_v10_0_early_init, + .late_init = gfx_v10_0_late_init, + .sw_init = gfx_v10_0_sw_init, + .sw_fini = gfx_v10_0_sw_fini, + .hw_init = gfx_v10_0_hw_init, + .hw_fini = gfx_v10_0_hw_fini, + .suspend = gfx_v10_0_suspend, + .resume = gfx_v10_0_resume, + .is_idle = gfx_v10_0_is_idle, + .wait_for_idle = gfx_v10_0_wait_for_idle, + .soft_reset = gfx_v10_0_soft_reset, + .set_clockgating_state = gfx_v10_0_set_clockgating_state, + .set_powergating_state = gfx_v10_0_set_powergating_state, + .get_clockgating_state = gfx_v10_0_get_clockgating_state, +}; + +static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { + .type = AMDGPU_RING_TYPE_GFX, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB, + .get_rptr = gfx_v10_0_ring_get_rptr_gfx, + .get_wptr = gfx_v10_0_ring_get_wptr_gfx, + .set_wptr = gfx_v10_0_ring_set_wptr_gfx, + .emit_frame_size = /* totally 242 maximum if 16 IBs */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 20 + /* GDS switch */ + 4 + /* double SWITCH_BUFFER, + * the first COND_EXEC jump to the place + * just prior to this double SWITCH_BUFFER + */ + 5 + /* COND_EXEC */ + 7 + /* HDP_flush */ + 4 + /* VGT_flush */ + 14 + /* CE_META */ + 31 + /* DE_META */ + 3 + /* CNTX_CTRL */ + 5 + /* HDP_INVL */ + 8 + 8 + /* FENCE x2 */ + 2, /* SWITCH_BUFFER */ + .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_gfx */ + .emit_ib = gfx_v10_0_ring_emit_ib_gfx, + .emit_fence = gfx_v10_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush, + .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, + .test_ring = gfx_v10_0_ring_test_ring, + .test_ib = gfx_v10_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_switch_buffer = gfx_v10_0_ring_emit_sb, + .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, + .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, + .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, + .preempt_ib = gfx_v10_0_ring_preempt_ib, + .emit_tmz = gfx_v10_0_ring_emit_tmz, + .emit_wreg = gfx_v10_0_ring_emit_wreg, + .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, +}; + +static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { + .type = AMDGPU_RING_TYPE_COMPUTE, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB, + .get_rptr = gfx_v10_0_ring_get_rptr_compute, + .get_wptr = gfx_v10_0_ring_get_wptr_compute, + .set_wptr = gfx_v10_0_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v10_0_ring_emit_gds_switch */ + 7 + /* gfx_v10_0_ring_emit_hdp_flush */ + 5 + /* hdp invalidate */ + 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v10_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ + .emit_ib = gfx_v10_0_ring_emit_ib_compute, + .emit_fence = gfx_v10_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v10_0_ring_emit_vm_flush, + .emit_gds_switch = gfx_v10_0_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush, + .test_ring = gfx_v10_0_ring_test_ring, + .test_ib = gfx_v10_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_wreg = gfx_v10_0_ring_emit_wreg, + .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, +}; + +static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { + .type = AMDGPU_RING_TYPE_KIQ, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB, + .get_rptr = gfx_v10_0_ring_get_rptr_compute, + .get_wptr = gfx_v10_0_ring_get_wptr_compute, + .set_wptr = gfx_v10_0_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v10_0_ring_emit_gds_switch */ + 7 + /* gfx_v10_0_ring_emit_hdp_flush */ + 5 + /*hdp invalidate */ + 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + + 2 + /* gfx_v10_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */ + .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ + .emit_ib = gfx_v10_0_ring_emit_ib_compute, + .emit_fence = gfx_v10_0_ring_emit_fence_kiq, + .test_ring = gfx_v10_0_ring_test_ring, + .test_ib = gfx_v10_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_rreg = gfx_v10_0_ring_emit_rreg, + .emit_wreg = gfx_v10_0_ring_emit_wreg, + .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, +}; + +static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + adev->gfx.kiq.ring.funcs = &gfx_v10_0_ring_funcs_kiq; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + adev->gfx.gfx_ring[i].funcs = &gfx_v10_0_ring_funcs_gfx; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) + adev->gfx.compute_ring[i].funcs = &gfx_v10_0_ring_funcs_compute; +} + +static const struct amdgpu_irq_src_funcs gfx_v10_0_eop_irq_funcs = { + .set = gfx_v10_0_set_eop_interrupt_state, + .process = gfx_v10_0_eop_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_reg_irq_funcs = { + .set = gfx_v10_0_set_priv_reg_fault_state, + .process = gfx_v10_0_priv_reg_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v10_0_priv_inst_irq_funcs = { + .set = gfx_v10_0_set_priv_inst_fault_state, + .process = gfx_v10_0_priv_inst_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs = { + .set = gfx_v10_0_kiq_set_interrupt_state, + .process = gfx_v10_0_kiq_irq, +}; + +static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; + adev->gfx.eop_irq.funcs = &gfx_v10_0_eop_irq_funcs; + + adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; + adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs; + + adev->gfx.priv_reg_irq.num_types = 1; + adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs; + + adev->gfx.priv_inst_irq.num_types = 1; + adev->gfx.priv_inst_irq.funcs = &gfx_v10_0_priv_inst_irq_funcs; +} + +static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_NAVI10: + adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; + break; + default: + break; + } +} + +static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev) +{ + /* init asic gds info */ + switch (adev->asic_type) { + case CHIP_NAVI10: + default: + adev->gds.gds_size = 0x10000; + adev->gds.gds_compute_max_wave_id = 0x4ff; + adev->gds.vgt_gs_max_wave_id = 0x3ff; + break; + } + + adev->gds.gws_size = 64; + adev->gds.oa_size = 16; +} + +static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, + u32 bitmap) +{ + u32 data; + + if (!bitmap) + return; + + data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + + WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); +} + +static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) +{ + u32 data, wgp_bitmask; + data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); + + data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; + data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; + + wgp_bitmask = + amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); + + return (~data) & wgp_bitmask; +} + +static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) +{ + u32 wgp_idx, wgp_active_bitmap; + u32 cu_bitmap_per_wgp, cu_active_bitmap; + + wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); + cu_active_bitmap = 0; + + for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { + /* if there is one WGP enabled, it means 2 CUs will be enabled */ + cu_bitmap_per_wgp = 3 << (2 * wgp_idx); + if (wgp_active_bitmap & (1 << wgp_idx)) + cu_active_bitmap |= cu_bitmap_per_wgp; + } + + return cu_active_bitmap; +} + +static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info) +{ + int i, j, k, counter, active_cu_number = 0; + u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; + unsigned disable_masks[4 * 2]; + + if (!adev || !cu_info) + return -EINVAL; + + amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + ao_bitmap = 0; + counter = 0; + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); + if (i < 4 && j < 2) + gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh( + adev, disable_masks[i * 2 + j]); + bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev); + cu_info->bitmap[i][j] = bitmap; + + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { + if (bitmap & mask) { + if (counter < adev->gfx.config.max_cu_per_sh) + ao_bitmap |= mask; + counter++; + } + mask <<= 1; + } + active_cu_number += counter; + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; + } + } + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + cu_info->number = active_cu_number; + cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + + return 0; +} + +const struct amdgpu_ip_block_version gfx_v10_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 10, + .minor = 0, + .rev = 0, + .funcs = &gfx_v10_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vi_dpm.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h index c43e03fddfba..b442e50324d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2019 dvanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,12 +21,9 @@ * */ -#ifndef __VI_DPM_H__ -#define __VI_DPM_H__ +#ifndef __GFX_V10_0_H__ +#define __GFX_V10_0_H__ -extern const struct amd_ip_funcs cz_dpm_ip_funcs; -int cz_smu_init(struct amdgpu_device *adev); -int cz_smu_start(struct amdgpu_device *adev); -int cz_smu_fini(struct amdgpu_device *adev); +extern const struct amdgpu_ip_block_version gfx_v10_0_ip_block; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 91f10995249b..7f0a63628c43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3043,7 +3043,7 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q) + u32 me, u32 pipe, u32 q, u32 vm) { DRM_INFO("Not implemented\n"); } @@ -3115,7 +3115,7 @@ static int gfx_v6_0_sw_init(void *handle) ring->ring_obj = NULL; sprintf(ring->name, "gfx"); r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); + &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) return r; } @@ -3350,7 +3350,7 @@ static int gfx_v6_0_set_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { switch (type) { - case AMDGPU_CP_IRQ_GFX_EOP: + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: gfx_v6_0_set_gfx_eop_interrupt_state(adev, state); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 003bb5769183..0db9f488da7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4169,9 +4169,9 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q) + u32 me, u32 pipe, u32 q, u32 vm) { - cik_srbm_select(adev, me, pipe, q, 0); + cik_srbm_select(adev, me, pipe, q, vm); } static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { @@ -4462,7 +4462,7 @@ static int gfx_v7_0_sw_init(void *handle) ring->ring_obj = NULL; sprintf(ring->name, "gfx"); r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); + &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) return r; } @@ -4799,7 +4799,7 @@ static int gfx_v7_0_set_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { switch (type) { - case AMDGPU_CP_IRQ_GFX_EOP: + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: gfx_v7_0_set_gfx_eop_interrupt_state(adev, state); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b7a2df46dc22..5f401b41ef7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2009,7 +2009,7 @@ static int gfx_v8_0_sw_init(void *handle) } r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, - AMDGPU_CP_IRQ_GFX_EOP); + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) return r; } @@ -2046,7 +2046,7 @@ static int gfx_v8_0_sw_init(void *handle) return r; /* create MQD for all compute queues as well as KIQ for SRIOV case */ - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); if (r) return r; @@ -2069,7 +2069,7 @@ static int gfx_v8_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_compute_mqd_sw_fini(adev); + amdgpu_gfx_mqd_sw_fini(adev); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); @@ -3436,9 +3436,9 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, } static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q) + u32 me, u32 pipe, u32 q, u32 vm) { - vi_srbm_select(adev, me, pipe, q, 0); + vi_srbm_select(adev, me, pipe, q, vm); } static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) @@ -3925,11 +3925,10 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) int list_size; unsigned int *register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); + kmemdup(adev->gfx.rlc.register_list_format, + adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); if (!register_list_format) return -ENOMEM; - memcpy(register_list_format, adev->gfx.rlc.register_list_format, - adev->gfx.rlc.reg_list_format_size_bytes); gfx_v8_0_parse_ind_reg_list(register_list_format, RLC_FormatDirectRegListLength, @@ -6217,7 +6216,7 @@ static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, struct amdgpu_ring *iring; mutex_lock(&adev->gfx.pipe_reserve_mutex); - pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); if (acquire) set_bit(pipe, adev->gfx.pipe_reserve_bitmap); else @@ -6236,20 +6235,20 @@ static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, /* Lower all pipes without a current reservation */ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { iring = &adev->gfx.gfx_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v8_0_ring_set_pipe_percent(iring, reserve); } for (i = 0; i < adev->gfx.num_compute_rings; ++i) { iring = &adev->gfx.compute_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v8_0_ring_set_pipe_percent(iring, reserve); } @@ -6537,7 +6536,7 @@ static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { switch (type) { - case AMDGPU_CP_IRQ_GFX_EOP: + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b4b85e550bc2..f4c4eea62526 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -309,6 +309,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); +static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { @@ -1312,9 +1313,9 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q) + u32 me, u32 pipe, u32 q, u32 vm) { - soc15_grbm_select(adev, me, pipe, q, 0); + soc15_grbm_select(adev, me, pipe, q, vm); } static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { @@ -1724,7 +1725,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); + &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); if (r) return r; } @@ -1760,7 +1761,7 @@ static int gfx_v9_0_sw_init(void *handle) return r; /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); if (r) return r; @@ -1802,7 +1803,7 @@ static int gfx_v9_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_compute_mqd_sw_fini(adev); + amdgpu_gfx_mqd_sw_fini(adev); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); @@ -1941,11 +1942,15 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) if (i == 0) { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!amdgpu_noretry); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); } else { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!amdgpu_noretry); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, (adev->gmc.private_aperture_start >> 48)); @@ -1959,25 +1964,6 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v9_0_init_compute_vmid(adev); - - mutex_lock(&adev->grbm_idx_mutex); - /* - * making sure that the following register writes will be broadcasted - * to all the shaders - */ - gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - - WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE, - (adev->gfx.config.sc_prim_fifo_size_frontend << - PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_prim_fifo_size_backend << - PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_hiz_tile_fifo_size << - PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | - (adev->gfx.config.sc_earlyz_tile_fifo_size << - PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); - mutex_unlock(&adev->grbm_idx_mutex); - } static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) @@ -2092,11 +2078,10 @@ static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) u32 tmp = 0; u32 *register_list_format = - kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); + kmemdup(adev->gfx.rlc.register_list_format, + adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); if (!register_list_format) return -ENOMEM; - memcpy(register_list_format, adev->gfx.rlc.register_list_format, - adev->gfx.rlc.reg_list_format_size_bytes); /* setup unique_indirect_regs array and indirect_start_offsets array */ unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); @@ -3600,45 +3585,89 @@ static const struct soc15_reg_entry sgpr_init_regs[] = { }; static const struct soc15_reg_entry sec_ded_counter_registers[] = { - { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) }, - { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) }, - { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) }, - { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) }, - { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) }, - { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) }, - { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) }, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, + { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, + { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, + { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, + { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, + { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, + { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, }; +static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; + int i, r; + + r = amdgpu_ring_alloc(ring, 7); + if (r) { + DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", + ring->name, r); + return r; + } + + WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); + WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); + + amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); + amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | + PACKET3_DMA_DATA_DST_SEL(1) | + PACKET3_DMA_DATA_SRC_SEL(2) | + PACKET3_DMA_DATA_ENGINE(0))); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | + adev->gds.gds_size); + + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); + + return r; +} + static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; struct amdgpu_ib ib; struct dma_fence *f = NULL; - int r, i, j; + int r, i, j, k; unsigned total_size, vgpr_offset, sgpr_offset; u64 gpu_addr; @@ -3750,19 +3779,13 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* read back registers to clear the counters */ mutex_lock(&adev->grbm_idx_mutex); - for (j = 0; j < 16; j++) { - gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); - gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); - gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); - gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j); - for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) - RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { + for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { + for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { + gfx_v9_0_select_se_sh(adev, j, 0x0, k); + RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); + } + } } WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); mutex_unlock(&adev->grbm_idx_mutex); @@ -3815,6 +3838,10 @@ static int gfx_v9_0_ecc_late_init(void *handle) return 0; } + r = gfx_v9_0_do_edc_gds_workarounds(adev); + if (r) + return r; + /* requires IBs so do in late init after IB pool is initialized */ r = gfx_v9_0_do_edc_gpr_workarounds(adev); if (r) @@ -3864,9 +3891,7 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) goto interrupt; - r = amdgpu_ras_debugfs_create(adev, &fs_info); - if (r) - goto debugfs; + amdgpu_ras_debugfs_create(adev, &fs_info); r = amdgpu_ras_sysfs_create(adev, &fs_info); if (r) @@ -3881,7 +3906,6 @@ irq: amdgpu_ras_sysfs_remove(adev, *ras_if); sysfs: amdgpu_ras_debugfs_remove(adev, *ras_if); -debugfs: amdgpu_ras_interrupt_remove_handler(adev, &ih_info); interrupt: amdgpu_ras_feature_enable(adev, *ras_if, 0); @@ -4542,7 +4566,7 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, struct amdgpu_ring *iring; mutex_lock(&adev->gfx.pipe_reserve_mutex); - pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); if (acquire) set_bit(pipe, adev->gfx.pipe_reserve_bitmap); else @@ -4561,20 +4585,20 @@ static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, /* Lower all pipes without a current reservation */ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { iring = &adev->gfx.gfx_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v9_0_ring_set_pipe_percent(iring, reserve); } for (i = 0; i < adev->gfx.num_compute_rings; ++i) { iring = &adev->gfx.compute_ring[i]; - pipe = amdgpu_gfx_queue_to_bit(adev, - iring->me, - iring->pipe, - 0); + pipe = amdgpu_gfx_mec_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); gfx_v9_0_ring_set_pipe_percent(iring, reserve); } @@ -4989,7 +5013,7 @@ static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { switch (type) { - case AMDGPU_CP_IRQ_GFX_EOP: + case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); break; case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 9f0f189fc111..15986748f59f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -236,7 +236,8 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 1); + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c new file mode 100644 index 000000000000..d605b4963f8a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -0,0 +1,354 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "gfxhub_v2_0.h" + +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "gc/gc_10_1_0_default.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +u64 gfxhub_v2_0_get_fb_location(struct amdgpu_device *adev) +{ + u64 base = RREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE); + + base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) +{ + return (u64)RREG32_SOC15(GC, 0, mmGCMC_VM_FB_OFFSET) << 24; +} + +static void gfxhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev) +{ + uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); + + + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + lower_32_bits(value)); + + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + upper_32_bits(value)); +} + +static void gfxhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + gfxhub_v2_0_init_gart_pt_regs(adev); + + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void gfxhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + + /* Disable AGP. */ + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->gmc.vram_start >> 18); + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->gmc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + WREG32_FIELD15(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); +} + + +static void gfxhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp); + + tmp = mmGCVM_L2_CNTL3_DEFAULT; + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp); + + tmp = mmGCVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL4, tmp); +} + +static void gfxhub_v2_0_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + WREG32_SOC15(GC, 0, mmGCVM_CONTEXT0_CNTL, tmp); +} + +static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); + + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); + +} + +static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) +{ + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } +} + +static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev) +{ + unsigned i; + + for (i = 0 ; i < 18; ++i) { + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + 2 * i, 0xffffffff); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + 2 * i, 0x1f); + } +} + +int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE, + adev->gmc.vram_start >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + gfxhub_v2_0_init_gart_aperture_regs(adev); + gfxhub_v2_0_init_system_aperture_regs(adev); + gfxhub_v2_0_init_tlb_regs(adev); + gfxhub_v2_0_init_cache_regs(adev); + + gfxhub_v2_0_enable_system_domain(adev); + gfxhub_v2_0_disable_identity_aperture(adev); + gfxhub_v2_0_setup_vmid_config(adev); + gfxhub_v2_0_program_invalidation(adev); + + return 0; +} + +void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0); +} + +/** + * gfxhub_v2_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +void gfxhub_v2_0_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(GC, 0, + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(GC, 0, + mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); +} diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h new file mode 100644 index 000000000000..06807940748b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.h @@ -0,0 +1,35 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFXHUB_V2_0_H__ +#define __GFXHUB_V2_0_H__ + +u64 gfxhub_v2_0_get_fb_location(struct amdgpu_device *adev); +int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev); +void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev); +void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value); +void gfxhub_v2_0_init(struct amdgpu_device *adev); +u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c new file mode 100644 index 000000000000..5eeb72fcc123 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -0,0 +1,918 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include <linux/pci.h> +#include "amdgpu.h" +#include "amdgpu_atomfirmware.h" +#include "gmc_v10_0.h" + +#include "hdp/hdp_5_0_0_offset.h" +#include "hdp/hdp_5_0_0_sh_mask.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "dcn/dcn_2_0_0_offset.h" +#include "dcn/dcn_2_0_0_sh_mask.h" +#include "oss/osssys_5_0_0_offset.h" +#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" +#include "navi10_enum.h" + +#include "soc15.h" +#include "soc15_common.h" + +#include "nbio_v2_3.h" + +#include "gfxhub_v2_0.h" +#include "mmhub_v2_0.h" +#include "athub_v2_0.h" +/* XXX Move this macro to navi10 header file, which is like vid.h for VI.*/ +#define AMDGPU_NUM_OF_VMIDS 8 + +#if 0 +static const struct soc15_reg_golden golden_settings_navi10_hdp[] = +{ + /* TODO add golden setting for hdp */ +}; +#endif + +static int +gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, unsigned type, + enum amdgpu_interrupt_state state) +{ + struct amdgpu_vmhub *hub; + u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i; + + bits[AMDGPU_GFXHUB] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + bits[AMDGPU_MMHUB] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + /* MM HUB */ + hub = &adev->vmhub[AMDGPU_MMHUB]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp &= ~bits[AMDGPU_MMHUB]; + WREG32(reg, tmp); + } + + /* GFX HUB */ + hub = &adev->vmhub[AMDGPU_GFXHUB]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp &= ~bits[AMDGPU_GFXHUB]; + WREG32(reg, tmp); + } + break; + case AMDGPU_IRQ_STATE_ENABLE: + /* MM HUB */ + hub = &adev->vmhub[AMDGPU_MMHUB]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp |= bits[AMDGPU_MMHUB]; + WREG32(reg, tmp); + } + + /* GFX HUB */ + hub = &adev->vmhub[AMDGPU_GFXHUB]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp |= bits[AMDGPU_GFXHUB]; + WREG32(reg, tmp); + } + break; + default: + break; + } + + return 0; +} + +static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; + uint32_t status = 0; + u64 addr; + + addr = (u64)entry->src_data[0] << 12; + addr |= ((u64)entry->src_data[1] & 0xf) << 44; + + if (!amdgpu_sriov_vf(adev)) { + status = RREG32(hub->vm_l2_pro_fault_status); + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + } + + if (printk_ratelimit()) { + dev_err(adev->dev, + "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", + entry->vmid_src ? "mmhub" : "gfxhub", + entry->src_id, entry->ring_id, entry->vmid, + entry->pasid); + dev_err(adev->dev, " at page 0x%016llx from %d\n", + addr, entry->client_id); + if (!amdgpu_sriov_vf(adev)) + dev_err(adev->dev, + "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + status); + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs gmc_v10_0_irq_funcs = { + .set = gmc_v10_0_vm_fault_interrupt_state, + .process = gmc_v10_0_process_interrupt, +}; + +static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gmc.vm_fault.num_types = 1; + adev->gmc.vm_fault.funcs = &gmc_v10_0_irq_funcs; +} + +static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +/* + * GART + * VMID 0 is the physical GPU addresses as used by the kernel. + * VMIDs 1-15 are used for userspace clients and are handled + * by the amdgpu vm/hsa code. + */ + +static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, + unsigned int vmhub, uint32_t flush_type) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; + u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type); + /* Use register 17 for GART */ + const unsigned eng = 17; + unsigned int i; + + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + + /* Wait for ACK with a delay.*/ + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp &= 1 << vmid; + if (tmp) + break; + + udelay(1); + } + + if (i < adev->usec_timeout) + return; + + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); +} + +/** + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback + * + * @adev: amdgpu_device pointer + * @vmid: vm instance to flush + * + * Flush the TLB for the requested page table. + */ +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, + uint32_t vmid, uint32_t flush_type) +{ + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct dma_fence *fence; + struct amdgpu_job *job; + + int r; + + /* flush hdp cache */ + adev->nbio_funcs->hdp_flush(adev, NULL); + + mutex_lock(&adev->mman.gtt_window_lock); + + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB, 0); + if (!adev->mman.buffer_funcs_enabled || + !adev->ib_pool_ready || + adev->in_gpu_reset) { + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB, 0); + mutex_unlock(&adev->mman.gtt_window_lock); + return; + } + + /* The SDMA on Navi has a bug which can theoretically result in memory + * corruption if an invalidation happens at the same time as an VA + * translation. Avoid this by doing the invalidation from the SDMA + * itself. + */ + r = amdgpu_job_alloc_with_ib(adev, 16 * 4, &job); + if (r) + goto error_alloc; + + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); + job->vm_needs_flush = true; + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + r = amdgpu_job_submit(job, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_submit; + + mutex_unlock(&adev->mman.gtt_window_lock); + + dma_fence_wait(fence, false); + dma_fence_put(fence); + + return; + +error_submit: + amdgpu_job_free(job); + +error_alloc: + mutex_unlock(&adev->mman.gtt_window_lock); + DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); +} + +static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); + unsigned eng = ring->vm_inv_eng; + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), + lower_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + upper_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); + + /* wait for the invalidate to complete */ + amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, + 1 << vmid, 1 << vmid); + + return pd_addr; +} + +static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, + unsigned pasid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg; + + if (ring->funcs->vmhub == AMDGPU_GFXHUB) + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; + else + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; + + amdgpu_ring_emit_wreg(ring, reg, pasid); +} + +/* + * PTE format on NAVI 10: + * 63:59 reserved + * 58:57 reserved + * 56 F + * 55 L + * 54 reserved + * 53:52 SW + * 51 T + * 50:48 mtype + * 47:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 Z + * 2 snooped + * 1 system + * 0 valid + * + * PDE format on NAVI 10: + * 63:59 block fragment size + * 58:55 reserved + * 54 P + * 53:48 reserved + * 47:6 physical base address of PD or PTE + * 5:3 reserved + * 2 C + * 1 system + * 0 valid + */ +static uint64_t gmc_v10_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) + pte_flag |= AMDGPU_PTE_EXECUTABLE; + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + + switch (flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_NC: + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_CC: + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); + break; + default: + pte_flag |= AMDGPU_PTE_MTYPE_NV10(MTYPE_NC); + break; + } + + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + +static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) +{ + if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) + *addr = adev->vm_manager.vram_base_offset + *addr - + adev->gmc.vram_start; + BUG_ON(*addr & 0xFFFF00000000003FULL); + + if (!adev->gmc.translate_further) + return; + + if (level == AMDGPU_VM_PDB1) { + /* Set the block fragment size */ + if (!(*flags & AMDGPU_PDE_PTE)) + *flags |= AMDGPU_PDE_BFS(0x9); + + } else if (level == AMDGPU_VM_PDB0) { + if (*flags & AMDGPU_PDE_PTE) + *flags &= ~AMDGPU_PDE_PTE; + else + *flags |= AMDGPU_PTE_TF; + } +} + +static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = { + .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb, + .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb, + .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping, + .get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags, + .get_vm_pde = gmc_v10_0_get_vm_pde +}; + +static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) +{ + if (adev->gmc.gmc_funcs == NULL) + adev->gmc.gmc_funcs = &gmc_v10_0_gmc_funcs; +} + +static int gmc_v10_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gmc_v10_0_set_gmc_funcs(adev); + gmc_v10_0_set_irq_funcs(adev); + + adev->gmc.shared_aperture_start = 0x2000000000000000ULL; + adev->gmc.shared_aperture_end = + adev->gmc.shared_aperture_start + (4ULL << 30) - 1; + adev->gmc.private_aperture_start = 0x1000000000000000ULL; + adev->gmc.private_aperture_end = + adev->gmc.private_aperture_start + (4ULL << 30) - 1; + + return 0; +} + +static int gmc_v10_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 }; + unsigned i; + + for(i = 0; i < adev->num_rings; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + unsigned vmhub = ring->funcs->vmhub; + + ring->vm_inv_eng = vm_inv_eng[vmhub]++; + dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", + ring->idx, ring->name, ring->vm_inv_eng, + ring->funcs->vmhub); + } + + /* Engine 17 is used for GART flushes */ + for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) + BUG_ON(vm_inv_eng[i] > 17); + + return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); +} + +static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, + struct amdgpu_gmc *mc) +{ + u64 base = 0; + + if (!amdgpu_sriov_vf(adev)) + base = gfxhub_v2_0_get_fb_location(adev); + + amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_gart_location(adev, mc); + + /* base offset of vram pages */ + adev->vm_manager.vram_base_offset = gfxhub_v2_0_get_mc_fb_offset(adev); +} + +/** + * gmc_v10_0_mc_init - initialize the memory controller driver params + * + * @adev: amdgpu_device pointer + * + * Look up the amount of vram, vram width, and decide how to place + * vram and gart within the GPU's physical address space. + * Returns 0 for success. + */ +static int gmc_v10_0_mc_init(struct amdgpu_device *adev) +{ + int chansize, numchan; + + if (!amdgpu_emu_mode) + adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); + else { + /* hard code vram_width for emulation */ + chansize = 128; + numchan = 1; + adev->gmc.vram_width = numchan * chansize; + } + + /* Could aper size report 0 ? */ + adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); + adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); + + /* size in MB on si */ + adev->gmc.mc_vram_size = + adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; + adev->gmc.real_vram_size = adev->gmc.mc_vram_size; + adev->gmc.visible_vram_size = adev->gmc.aper_size; + + /* In case the PCI BAR is larger than the actual amount of vram */ + if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) + adev->gmc.visible_vram_size = adev->gmc.real_vram_size; + + /* set the gart size */ + if (amdgpu_gart_size == -1) { + switch (adev->asic_type) { + case CHIP_NAVI10: + default: + adev->gmc.gart_size = 512ULL << 20; + break; + } + } else + adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; + + gmc_v10_0_vram_gtt_location(adev, &adev->gmc); + + return 0; +} + +static int gmc_v10_0_gart_init(struct amdgpu_device *adev) +{ + int r; + + if (adev->gart.bo) { + WARN(1, "NAVI10 PCIE GART already initialized\n"); + return 0; + } + + /* Initialize common gart structure */ + r = amdgpu_gart_init(adev); + if (r) + return r; + + adev->gart.table_size = adev->gart.num_gpu_pages * 8; + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) | + AMDGPU_PTE_EXECUTABLE; + + return amdgpu_gart_table_vram_alloc(adev); +} + +static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); + unsigned size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport; + u32 pitch; + + viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); + pitch = RREG32_SOC15(DCE, 0, mmHUBPREQ0_DCSURF_SURFACE_PITCH); + size = (REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * + REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) * + 4); + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) { + DRM_ERROR("Warning: pre-OS buffer uses most of vram, \ + be aware of gart table overwrite\n"); + return 0; + } + + return size; +} + + + +static int gmc_v10_0_sw_init(void *handle) +{ + int r; + int dma_bits; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gfxhub_v2_0_init(adev); + mmhub_v2_0_init(adev); + + spin_lock_init(&adev->gmc.invalidate_lock); + + adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); + switch (adev->asic_type) { + case CHIP_NAVI10: + /* + * To fulfill 4-level page support, + * vm size is 256TB (48bit), maximum size of Navi10, + * block size 512 (9bit) + */ + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + break; + default: + break; + } + + /* This interrupt is VMC page fault.*/ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, + VMC_1_0__SRCID__VM_FAULT, + &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, + UTCL2_1_0__SRCID__FAULT, + &adev->gmc.vm_fault); + if (r) + return r; + + /* + * Set the internal MC address mask This is the max address of the GPU's + * internal address space. + */ + adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ + + /* + * Reserve 8M stolen memory for navi10 like vega10 + * TODO: will check if it's really needed on asic. + */ + if (amdgpu_emu_mode == 1) + adev->gmc.stolen_size = 0; + else + adev->gmc.stolen_size = 9 * 1024 *1024; + + /* + * Set DMA mask + need_dma32 flags. + * PCIE - can handle 44-bits. + * IGP - can handle 44-bits + * PCI - dma32 for legacy pci gart, 44 bits on navi10 + */ + adev->need_dma32 = false; + dma_bits = adev->need_dma32 ? 32 : 44; + + r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + if (r) { + adev->need_dma32 = true; + dma_bits = 32; + printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + } + + r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + if (r) { + pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); + printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); + } + + r = gmc_v10_0_mc_init(adev); + if (r) + return r; + + adev->gmc.stolen_size = gmc_v10_0_get_vbios_fb_size(adev); + + /* Memory manager */ + r = amdgpu_bo_init(adev); + if (r) + return r; + + r = gmc_v10_0_gart_init(adev); + if (r) + return r; + + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + + amdgpu_vm_manager_init(adev); + + return 0; +} + +/** + * gmc_v8_0_gart_fini - vm fini callback + * + * @adev: amdgpu_device pointer + * + * Tears down the driver GART/VM setup (CIK). + */ +static void gmc_v10_0_gart_fini(struct amdgpu_device *adev) +{ + amdgpu_gart_table_vram_free(adev); + amdgpu_gart_fini(adev); +} + +static int gmc_v10_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_vm_manager_fini(adev); + gmc_v10_0_gart_fini(adev); + amdgpu_gem_force_release(adev); + amdgpu_bo_fini(adev); + + return 0; +} + +static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_NAVI10: + break; + default: + break; + } +} + +/** + * gmc_v10_0_gart_enable - gart enable + * + * @adev: amdgpu_device pointer + */ +static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) +{ + int r; + bool value; + u32 tmp; + + if (adev->gart.bo == NULL) { + dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); + return -EINVAL; + } + + r = amdgpu_gart_table_vram_pin(adev); + if (r) + return r; + + r = gfxhub_v2_0_gart_enable(adev); + if (r) + return r; + + r = mmhub_v2_0_gart_enable(adev); + if (r) + return r; + + tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); + tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; + WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp); + + tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); + WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); + + /* Flush HDP after it is initialized */ + adev->nbio_funcs->hdp_flush(adev, NULL); + + value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? + false : true; + + gfxhub_v2_0_set_fault_enable_default(adev, value); + mmhub_v2_0_set_fault_enable_default(adev, value); + gmc_v10_0_flush_gpu_tlb(adev, 0, 0); + + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(adev->gmc.gart_size >> 20), + (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); + + adev->gart.ready = true; + + return 0; +} + +static int gmc_v10_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* The sequence of these two function calls matters.*/ + gmc_v10_0_init_golden_registers(adev); + + r = gmc_v10_0_gart_enable(adev); + if (r) + return r; + + return 0; +} + +/** + * gmc_v10_0_gart_disable - gart disable + * + * @adev: amdgpu_device pointer + * + * This disables all VM page table. + */ +static void gmc_v10_0_gart_disable(struct amdgpu_device *adev) +{ + gfxhub_v2_0_gart_disable(adev); + mmhub_v2_0_gart_disable(adev); + amdgpu_gart_table_vram_unpin(adev); +} + +static int gmc_v10_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) { + /* full access mode, so don't touch any GMC register */ + DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); + return 0; + } + + amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + gmc_v10_0_gart_disable(adev); + + return 0; +} + +static int gmc_v10_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gmc_v10_0_hw_fini(adev); + + return 0; +} + +static int gmc_v10_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = gmc_v10_0_hw_init(adev); + if (r) + return r; + + amdgpu_vmid_reset_all(adev); + + return 0; +} + +static bool gmc_v10_0_is_idle(void *handle) +{ + /* MC is always ready in GMC v10.*/ + return true; +} + +static int gmc_v10_0_wait_for_idle(void *handle) +{ + /* There is no need to wait for MC idle in GMC v10.*/ + return 0; +} + +static int gmc_v10_0_soft_reset(void *handle) +{ + return 0; +} + +static int gmc_v10_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = mmhub_v2_0_set_clockgating(adev, state); + if (r) + return r; + + return athub_v2_0_set_clockgating(adev, state); +} + +static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + mmhub_v2_0_get_clockgating(adev, flags); + + athub_v2_0_get_clockgating(adev, flags); +} + +static int gmc_v10_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs gmc_v10_0_ip_funcs = { + .name = "gmc_v10_0", + .early_init = gmc_v10_0_early_init, + .late_init = gmc_v10_0_late_init, + .sw_init = gmc_v10_0_sw_init, + .sw_fini = gmc_v10_0_sw_fini, + .hw_init = gmc_v10_0_hw_init, + .hw_fini = gmc_v10_0_hw_fini, + .suspend = gmc_v10_0_suspend, + .resume = gmc_v10_0_resume, + .is_idle = gmc_v10_0_is_idle, + .wait_for_idle = gmc_v10_0_wait_for_idle, + .soft_reset = gmc_v10_0_soft_reset, + .set_clockgating_state = gmc_v10_0_set_clockgating_state, + .set_powergating_state = gmc_v10_0_set_powergating_state, + .get_clockgating_state = gmc_v10_0_get_clockgating_state, +}; + +const struct amdgpu_ip_block_version gmc_v10_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 10, + .minor = 0, + .rev = 0, + .funcs = &gmc_v10_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h new file mode 100644 index 000000000000..7daa53d8996c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GMC_V10_0_H__ +#define __GMC_V10_0_H__ + +extern const struct amd_ip_funcs gmc_v10_0_ip_funcs; +extern const struct amdgpu_ip_block_version gmc_v10_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8e3f5990e278..73f3b79ab131 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -535,22 +535,22 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, switch (flags & AMDGPU_VM_MTYPE_MASK) { case AMDGPU_VM_MTYPE_DEFAULT: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); break; case AMDGPU_VM_MTYPE_NC: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); break; case AMDGPU_VM_MTYPE_WC: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_WC); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_WC); break; case AMDGPU_VM_MTYPE_CC: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_CC); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); break; case AMDGPU_VM_MTYPE_UC: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_UC); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_UC); break; default: - pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); + pte_flag |= AMDGPU_PTE_MTYPE_VG10(MTYPE_NC); break; } @@ -733,9 +733,7 @@ static int gmc_v9_0_ecc_late_init(void *handle) if (r) goto interrupt; - r = amdgpu_ras_debugfs_create(adev, &fs_info); - if (r) - goto debugfs; + amdgpu_ras_debugfs_create(adev, &fs_info); r = amdgpu_ras_sysfs_create(adev, &fs_info); if (r) @@ -750,7 +748,6 @@ irq: amdgpu_ras_sysfs_remove(adev, *ras_if); sysfs: amdgpu_ras_debugfs_remove(adev, *ras_if); -debugfs: amdgpu_ras_interrupt_remove_handler(adev, &ih_info); interrupt: amdgpu_ras_feature_enable(adev, *ras_if, 0); @@ -919,7 +916,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) if (r) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; - adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE(MTYPE_UC) | + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) | AMDGPU_PTE_EXECUTABLE; return amdgpu_gart_table_vram_alloc(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c new file mode 100644 index 000000000000..29fab7984855 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -0,0 +1,366 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include "amdgpu.h" +#include "soc15_common.h" +#include "nv.h" +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/navi10_mes.bin"); + +static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, + struct mes_add_queue_input *input) +{ + return 0; +} + +static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes, + struct mes_remove_queue_input *input) +{ + return 0; +} + +static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes, + struct mes_suspend_gang_input *input) +{ + return 0; +} + +static int mes_v10_1_resume_gang(struct amdgpu_mes *mes, + struct mes_resume_gang_input *input) +{ + return 0; +} + +static const struct amdgpu_mes_funcs mes_v10_1_funcs = { + .add_hw_queue = mes_v10_1_add_hw_queue, + .remove_hw_queue = mes_v10_1_remove_hw_queue, + .suspend_gang = mes_v10_1_suspend_gang, + .resume_gang = mes_v10_1_resume_gang, +}; + +static int mes_v10_1_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[30]; + int err; + const struct mes_firmware_header_v1_0 *mes_hdr; + + switch (adev->asic_type) { + case CHIP_NAVI10: + chip_name = "navi10"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", chip_name); + err = request_firmware(&adev->mes.fw, fw_name, adev->dev); + if (err) + return err; + + err = amdgpu_ucode_validate(adev->mes.fw); + if (err) { + release_firmware(adev->mes.fw); + adev->mes.fw = NULL; + return err; + } + + mes_hdr = (const struct mes_firmware_header_v1_0 *)adev->mes.fw->data; + adev->mes.ucode_fw_version = le32_to_cpu(mes_hdr->mes_ucode_version); + adev->mes.ucode_fw_version = + le32_to_cpu(mes_hdr->mes_ucode_data_version); + adev->mes.uc_start_addr = + le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) | + ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); + adev->mes.data_start_addr = + le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | + ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); + + return 0; +} + +static void mes_v10_1_free_microcode(struct amdgpu_device *adev) +{ + release_firmware(adev->mes.fw); + adev->mes.fw = NULL; +} + +static int mes_v10_1_allocate_ucode_buffer(struct amdgpu_device *adev) +{ + int r; + const struct mes_firmware_header_v1_0 *mes_hdr; + const __le32 *fw_data; + unsigned fw_size; + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw->data; + + fw_data = (const __le32 *)(adev->mes.fw->data + + le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.ucode_fw_obj, + &adev->mes.ucode_fw_gpu_addr, + (void **)&adev->mes.ucode_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes fw bo\n", r); + return r; + } + + memcpy(adev->mes.ucode_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->mes.ucode_fw_obj); + amdgpu_bo_unreserve(adev->mes.ucode_fw_obj); + + return 0; +} + +static int mes_v10_1_allocate_ucode_data_buffer(struct amdgpu_device *adev) +{ + int r; + const struct mes_firmware_header_v1_0 *mes_hdr; + const __le32 *fw_data; + unsigned fw_size; + + mes_hdr = (const struct mes_firmware_header_v1_0 *) + adev->mes.fw->data; + + fw_data = (const __le32 *)(adev->mes.fw->data + + le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); + fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + 64 * 1024, AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.data_fw_obj, + &adev->mes.data_fw_gpu_addr, + (void **)&adev->mes.data_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes data fw bo\n", r); + return r; + } + + memcpy(adev->mes.data_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->mes.data_fw_obj); + amdgpu_bo_unreserve(adev->mes.data_fw_obj); + + return 0; +} + +static void mes_v10_1_free_ucode_buffers(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->mes.data_fw_obj, + &adev->mes.data_fw_gpu_addr, + (void **)&adev->mes.data_fw_ptr); + + amdgpu_bo_free_kernel(&adev->mes.ucode_fw_obj, + &adev->mes.ucode_fw_gpu_addr, + (void **)&adev->mes.ucode_fw_ptr); +} + +static void mes_v10_1_enable(struct amdgpu_device *adev, bool enable) +{ + uint32_t data = 0; + + if (enable) { + data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data); + + /* set ucode start address */ + WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START, + (uint32_t)(adev->mes.uc_start_addr) >> 2); + + /* clear BYPASS_UNCACHED to avoid hangs after interrupt. */ + data = RREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_DC_OP_CNTL, + BYPASS_UNCACHED, 0); + WREG32_SOC15(GC, 0, mmCP_MES_DC_OP_CNTL, data); + + /* unhalt MES and activate pipe0 */ + data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); + WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data); + } else { + data = RREG32_SOC15(GC, 0, mmCP_MES_CNTL); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, + MES_INVALIDATE_ICACHE, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); + WREG32_SOC15(GC, 0, mmCP_MES_CNTL, data); + } +} + +/* This function is for backdoor MES firmware */ +static int mes_v10_1_load_microcode(struct amdgpu_device *adev) +{ + int r; + uint32_t data; + + if (!adev->mes.fw) + return -EINVAL; + + r = mes_v10_1_allocate_ucode_buffer(adev); + if (r) + return r; + + r = mes_v10_1_allocate_ucode_data_buffer(adev); + if (r) { + mes_v10_1_free_ucode_buffers(adev); + return r; + } + + mes_v10_1_enable(adev, false); + + WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_CNTL, 0); + + mutex_lock(&adev->srbm_mutex); + /* me=3, pipe=0, queue=0 */ + nv_grbm_select(adev, 3, 0, 0, 0); + + /* set ucode start address */ + WREG32_SOC15(GC, 0, mmCP_MES_PRGRM_CNTR_START, + (uint32_t)(adev->mes.uc_start_addr) >> 2); + + /* set ucode fimrware address */ + WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_LO, + lower_32_bits(adev->mes.ucode_fw_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_MES_IC_BASE_HI, + upper_32_bits(adev->mes.ucode_fw_gpu_addr)); + + /* set ucode instruction cache boundary to 2M-1 */ + WREG32_SOC15(GC, 0, mmCP_MES_MIBOUND_LO, 0x1FFFFF); + + /* set ucode data firmware address */ + WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_LO, + lower_32_bits(adev->mes.data_fw_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_MES_MDBASE_HI, + upper_32_bits(adev->mes.data_fw_gpu_addr)); + + /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */ + WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF); + + /* invalidate ICACHE */ + data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data); + + /* prime the ICACHE. */ + data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); + WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL, data); + + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + return 0; +} + +static int mes_v10_1_sw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = mes_v10_1_init_microcode(adev); + if (r) + return r; + + return 0; +} + +static int mes_v10_1_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + mes_v10_1_free_microcode(adev); + + return 0; +} + +static int mes_v10_1_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + r = mes_v10_1_load_microcode(adev); + if (r) { + DRM_ERROR("failed to MES fw, r=%d\n", r); + return r; + } + } else { + DRM_ERROR("only support direct fw loading on MES\n"); + return -EINVAL; + } + + mes_v10_1_enable(adev, true); + + return 0; +} + +static int mes_v10_1_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + mes_v10_1_enable(adev, false); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) + mes_v10_1_free_ucode_buffers(adev); + + return 0; +} + +static int mes_v10_1_suspend(void *handle) +{ + return 0; +} + +static int mes_v10_1_resume(void *handle) +{ + return 0; +} + +static const struct amd_ip_funcs mes_v10_1_ip_funcs = { + .name = "mes_v10_1", + .sw_init = mes_v10_1_sw_init, + .sw_fini = mes_v10_1_sw_fini, + .hw_init = mes_v10_1_hw_init, + .hw_fini = mes_v10_1_hw_fini, + .suspend = mes_v10_1_suspend, + .resume = mes_v10_1_resume, +}; + +const struct amdgpu_ip_block_version mes_v10_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_MES, + .major = 10, + .minor = 1, + .rev = 0, + .funcs = &mes_v10_1_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h new file mode 100644 index 000000000000..9afd6ddb01e9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MES_V10_1_H__ +#define __MES_V10_1_H__ + +extern const struct amdgpu_ip_block_version mes_v10_1_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 05d1d448c8f5..dc5ce03034d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -265,7 +265,8 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 1); + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c new file mode 100644 index 000000000000..0f9549f19ade --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -0,0 +1,445 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "mmhub_v2_0.h" + +#include "mmhub/mmhub_2_0_0_offset.h" +#include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "mmhub/mmhub_2_0_0_default.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +static void mmhub_v2_0_init_gart_pt_regs(struct amdgpu_device *adev) +{ + uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); + + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + lower_32_bits(value)); + + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + upper_32_bits(value)); +} + +static void mmhub_v2_0_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + mmhub_v2_0_init_gart_pt_regs(adev); + + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + uint32_t tmp; + + /* Disable AGP. */ + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, 0); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, 0x00FFFFFF); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->gmc.vram_start >> 18); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->gmc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2, tmp); +} + +static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp); + + tmp = mmMMVM_L2_CNTL3_DEFAULT; + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp); + + tmp = mmMMVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL4, tmp); +} + +static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp); +} + +static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); + + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + 0); +} + +static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) +{ + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } +} + +static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev) +{ + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + 2 * i, 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + 2 * i, 0x1f); + } +} + +int mmhub_v2_0_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * MMMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_BASE, + adev->gmc.vram_start >> 24); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_TOP, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + mmhub_v2_0_init_gart_aperture_regs(adev); + mmhub_v2_0_init_system_aperture_regs(adev); + mmhub_v2_0_init_tlb_regs(adev); + mmhub_v2_0_init_cache_regs(adev); + + mmhub_v2_0_enable_system_domain(adev); + mmhub_v2_0_disable_identity_aperture(adev); + mmhub_v2_0_setup_vmid_config(adev); + mmhub_v2_0_program_invalidation(adev); + + return 0; +} + +void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, i, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, 0); +} + +/** + * mmhub_v2_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) +{ + u32 tmp; + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +void mmhub_v2_0_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); + +} + +static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data, def1, data1; + + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + + def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; + + data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + + } else { + data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; + + data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } + + if (def != data) + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); + + if (def1 != data1) + WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1); +} + +static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); +} + +int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_NAVI10: + mmhub_v2_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + mmhub_v2_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data, data1; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + + data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + + /* AMD_CG_SUPPORT_MC_MGCG */ + if ((data & MM_ATC_L2_MISC_CG__ENABLE_MASK) && + !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h new file mode 100644 index 000000000000..db16f3ece218 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.h @@ -0,0 +1,35 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V2_0_H__ +#define __MMHUB_V2_0_H__ + +int mmhub_v2_0_gart_enable(struct amdgpu_device *adev); +void mmhub_v2_0_gart_disable(struct amdgpu_device *adev); +void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value); +void mmhub_v2_0_init(struct amdgpu_device *adev); +int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 31030f86be86..235548c0b41f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -451,19 +451,16 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev) static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev) { - uint32_t rlc_fw_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); - uint32_t sos_fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); - adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY; - if (rlc_fw_ver >= 0x5d) - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC; + /* Enable L1 security reg access mode by defaul, as non-security VF + * will no longer be supported. + */ + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC; - if (sos_fw_ver >= 0x80455) - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH; + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH; - if (sos_fw_ver >= 0x8045b) - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING; + adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING; } const struct amdgpu_virt_ops xgpu_ai_virt_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c new file mode 100644 index 000000000000..e963746be11c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -0,0 +1,486 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_ih.h" + +#include "oss/osssys_5_0_0_offset.h" +#include "oss/osssys_5_0_0_sh_mask.h" + +#include "soc15_common.h" +#include "navi10_ih.h" + + +static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev); + +/** + * navi10_ih_enable_interrupts - Enable the interrupt ring buffer + * + * @adev: amdgpu_device pointer + * + * Enable the interrupt ring buffer (NAVI10). + */ +static void navi10_ih_enable_interrupts(struct amdgpu_device *adev) +{ + u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + adev->irq.ih.enabled = true; +} + +/** + * navi10_ih_disable_interrupts - Disable the interrupt ring buffer + * + * @adev: amdgpu_device pointer + * + * Disable the interrupt ring buffer (NAVI10). + */ +static void navi10_ih_disable_interrupts(struct amdgpu_device *adev) +{ + u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + /* set rptr, wptr to 0 */ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); + adev->irq.ih.enabled = false; + adev->irq.ih.rptr = 0; +} + +static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl) +{ + int rb_bufsz = order_base_2(ih->ring_size / 4); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + MC_SPACE, ih->use_bus_addr ? 1 : 4); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_CLEAR, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz); + /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register + * value is written to memory + */ + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_WRITEBACK_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0); + + return ih_rb_cntl; +} + +/** + * navi10_ih_irq_init - init and enable the interrupt ring + * + * @adev: amdgpu_device pointer + * + * Allocate a ring buffer for the interrupt controller, + * enable the RLC, disable interrupts, enable the IH + * ring buffer and enable it (NAVI). + * Called at device load and reume. + * Returns 0 for success, errors for failure. + */ +static int navi10_ih_irq_init(struct amdgpu_device *adev) +{ + struct amdgpu_ih_ring *ih = &adev->irq.ih; + int ret = 0; + u32 ih_rb_cntl, ih_doorbell_rtpr, ih_chicken; + u32 tmp; + + /* disable irqs */ + navi10_ih_disable_interrupts(adev); + + adev->nbio_funcs->ih_control(adev); + + /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff); + + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); + ih_rb_cntl = navi10_ih_rb_cntl(ih, ih_rb_cntl); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, + !!adev->irq.msi_enabled); + + if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) { + if (ih->use_bus_addr) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); + ih_chicken = REG_SET_FIELD(ih_chicken, + IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); + } + } + + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + + /* set the writeback address whether it's enabled or not */ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, + lower_32_bits(ih->wptr_addr)); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, + upper_32_bits(ih->wptr_addr) & 0xFFFF); + + /* set rptr, wptr to 0 */ + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); + + ih_doorbell_rtpr = RREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR); + if (ih->use_doorbell) { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, OFFSET, + ih->doorbell_index); + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, ENABLE, 1); + } else { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, ENABLE, 0); + } + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr); + + adev->nbio_funcs->ih_doorbell_range(adev, ih->use_doorbell, + ih->doorbell_index); + + tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); + tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, + CLIENT18_IS_STORM_CLIENT, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL); + tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL, tmp); + + pci_set_master(adev->pdev); + + /* enable interrupts */ + navi10_ih_enable_interrupts(adev); + + return ret; +} + +/** + * navi10_ih_irq_disable - disable interrupts + * + * @adev: amdgpu_device pointer + * + * Disable interrupts on the hw (NAVI10). + */ +static void navi10_ih_irq_disable(struct amdgpu_device *adev) +{ + navi10_ih_disable_interrupts(adev); + + /* Wait and acknowledge irq */ + mdelay(1); +} + +/** + * navi10_ih_get_wptr - get the IH ring buffer wptr + * + * @adev: amdgpu_device pointer + * + * Get the IH ring buffer wptr from either the register + * or the writeback memory buffer (NAVI10). Also check for + * ring buffer overflow and deal with it. + * Returns the value of the wptr. + */ +static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + u32 wptr, reg, tmp; + + wptr = le32_to_cpu(*ih->wptr_cpu); + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR); + wptr = RREG32_NO_KIQ(reg); + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 32). Hopefully + * this should allow us to catch up. + */ + tmp = (wptr + 32) & ih->ptr_mask; + dev_warn(adev->dev, "IH ring buffer overflow " + "(0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, tmp); + ih->rptr = tmp; + + reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL); + tmp = RREG32_NO_KIQ(reg); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(reg, tmp); +out: + return (wptr & ih->ptr_mask); +} + +/** + * navi10_ih_decode_iv - decode an interrupt vector + * + * @adev: amdgpu_device pointer + * + * Decodes the interrupt vector at the current rptr + * position and also advance the position. + */ +static void navi10_ih_decode_iv(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, + struct amdgpu_iv_entry *entry) +{ + /* wptr/rptr are in bytes! */ + u32 ring_index = ih->rptr >> 2; + uint32_t dw[8]; + + dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); + dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); + dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); + dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); + dw[4] = le32_to_cpu(ih->ring[ring_index + 4]); + dw[5] = le32_to_cpu(ih->ring[ring_index + 5]); + dw[6] = le32_to_cpu(ih->ring[ring_index + 6]); + dw[7] = le32_to_cpu(ih->ring[ring_index + 7]); + + entry->client_id = dw[0] & 0xff; + entry->src_id = (dw[0] >> 8) & 0xff; + entry->ring_id = (dw[0] >> 16) & 0xff; + entry->vmid = (dw[0] >> 24) & 0xf; + entry->vmid_src = (dw[0] >> 31); + entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); + entry->timestamp_src = dw[2] >> 31; + entry->pasid = dw[3] & 0xffff; + entry->pasid_src = dw[3] >> 31; + entry->src_data[0] = dw[4]; + entry->src_data[1] = dw[5]; + entry->src_data[2] = dw[6]; + entry->src_data[3] = dw[7]; + + /* wptr/rptr are in bytes! */ + ih->rptr += 32; +} + +/** + * navi10_ih_set_rptr - set the IH ring buffer rptr + * + * @adev: amdgpu_device pointer + * + * Set the IH ring buffer rptr. + */ +static void navi10_ih_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + if (ih->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + *ih->rptr_cpu = ih->rptr; + WDOORBELL32(ih->doorbell_index, ih->rptr); + } else + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr); +} + +static int navi10_ih_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + navi10_ih_set_interrupt_funcs(adev); + return 0; +} + +static int navi10_ih_sw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool use_bus_addr; + + /* use gpu virtual address for ih ring + * until ih_checken is programmed to allow + * use bus address for ih ring by psp bl */ + use_bus_addr = + (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); + if (r) + return r; + + adev->irq.ih.use_doorbell = true; + adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; + + r = amdgpu_irq_init(adev); + + return r; +} + +static int navi10_ih_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); + + return 0; +} + +static int navi10_ih_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = navi10_ih_irq_init(adev); + if (r) + return r; + + return 0; +} + +static int navi10_ih_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + navi10_ih_irq_disable(adev); + + return 0; +} + +static int navi10_ih_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return navi10_ih_hw_fini(adev); +} + +static int navi10_ih_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return navi10_ih_hw_init(adev); +} + +static bool navi10_ih_is_idle(void *handle) +{ + /* todo */ + return true; +} + +static int navi10_ih_wait_for_idle(void *handle) +{ + /* todo */ + return -ETIMEDOUT; +} + +static int navi10_ih_soft_reset(void *handle) +{ + /* todo */ + return 0; +} + +static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def, field_val; + + if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) { + def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL); + field_val = enable ? 0 : 1; + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DYN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + REG_CLK_SOFT_OVERRIDE, field_val); + if (def != data) + WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data); + } + + return; +} + +static int navi10_ih_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + navi10_ih_update_clockgating_state(adev, + state == AMD_CG_STATE_GATE ? true : false); + return 0; +} + +static int navi10_ih_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static void navi10_ih_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL)) + *flags |= AMD_CG_SUPPORT_IH_CG; + + return; +} + +static const struct amd_ip_funcs navi10_ih_ip_funcs = { + .name = "navi10_ih", + .early_init = navi10_ih_early_init, + .late_init = NULL, + .sw_init = navi10_ih_sw_init, + .sw_fini = navi10_ih_sw_fini, + .hw_init = navi10_ih_hw_init, + .hw_fini = navi10_ih_hw_fini, + .suspend = navi10_ih_suspend, + .resume = navi10_ih_resume, + .is_idle = navi10_ih_is_idle, + .wait_for_idle = navi10_ih_wait_for_idle, + .soft_reset = navi10_ih_soft_reset, + .set_clockgating_state = navi10_ih_set_clockgating_state, + .set_powergating_state = navi10_ih_set_powergating_state, + .get_clockgating_state = navi10_ih_get_clockgating_state, +}; + +static const struct amdgpu_ih_funcs navi10_ih_funcs = { + .get_wptr = navi10_ih_get_wptr, + .decode_iv = navi10_ih_decode_iv, + .set_rptr = navi10_ih_set_rptr +}; + +static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev) +{ + if (adev->irq.ih_funcs == NULL) + adev->irq.ih_funcs = &navi10_ih_funcs; +} + +const struct amdgpu_ip_block_version navi10_ih_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 5, + .minor = 0, + .rev = 0, + .funcs = &navi10_ih_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.h b/drivers/gpu/drm/amd/amdgpu/navi10_ih.h new file mode 100644 index 000000000000..140fbdaaed17 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NAVI10_IH_H__ +#define __NAVI10_IH_H__ + +extern const struct amdgpu_ip_block_version navi10_ih_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c new file mode 100644 index 000000000000..55014ce8670a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c @@ -0,0 +1,68 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "navi10_ip_offset.h" + +int navi10_reg_base_init(struct amdgpu_device *adev) +{ + int r, i; + + if (amdgpu_discovery) { + r = amdgpu_discovery_reg_base_init(adev); + if (r) { + DRM_WARN("failed to init reg base from ip discovery table, " + "fallback to legacy init method\n"); + goto legacy_init; + } + + return 0; + } + +legacy_init: + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + + return 0; +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h new file mode 100644 index 000000000000..074a9a09c0a7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h @@ -0,0 +1,4806 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NAVI10_SDMA_PKT_OPEN_H_ +#define __NAVI10_SDMA_PKT_OPEN_H_ + +#define SDMA_OP_NOP 0 +#define SDMA_OP_COPY 1 +#define SDMA_OP_WRITE 2 +#define SDMA_OP_INDIRECT 4 +#define SDMA_OP_FENCE 5 +#define SDMA_OP_TRAP 6 +#define SDMA_OP_SEM 7 +#define SDMA_OP_POLL_REGMEM 8 +#define SDMA_OP_COND_EXE 9 +#define SDMA_OP_ATOMIC 10 +#define SDMA_OP_CONST_FILL 11 +#define SDMA_OP_PTEPDE 12 +#define SDMA_OP_TIMESTAMP 13 +#define SDMA_OP_SRBM_WRITE 14 +#define SDMA_OP_PRE_EXE 15 +#define SDMA_OP_GPUVM_INV 16 +#define SDMA_OP_GCR_REQ 17 +#define SDMA_OP_DUMMY_TRAP 32 +#define SDMA_SUBOP_TIMESTAMP_SET 0 +#define SDMA_SUBOP_TIMESTAMP_GET 1 +#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2 +#define SDMA_SUBOP_COPY_LINEAR 0 +#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4 +#define SDMA_SUBOP_COPY_TILED 1 +#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5 +#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6 +#define SDMA_SUBOP_COPY_SOA 3 +#define SDMA_SUBOP_COPY_DIRTY_PAGE 7 +#define SDMA_SUBOP_COPY_LINEAR_PHY 8 +#define SDMA_SUBOP_COPY_LINEAR_BC 16 +#define SDMA_SUBOP_COPY_TILED_BC 17 +#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND_BC 20 +#define SDMA_SUBOP_COPY_TILED_SUB_WIND_BC 21 +#define SDMA_SUBOP_COPY_T2T_SUB_WIND_BC 22 +#define SDMA_SUBOP_WRITE_LINEAR 0 +#define SDMA_SUBOP_WRITE_TILED 1 +#define SDMA_SUBOP_WRITE_TILED_BC 17 +#define SDMA_SUBOP_PTEPDE_GEN 0 +#define SDMA_SUBOP_PTEPDE_COPY 1 +#define SDMA_SUBOP_PTEPDE_RMW 2 +#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS 3 +#define SDMA_SUBOP_DATA_FILL_MULTI 1 +#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1 +#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2 +#define SDMA_SUBOP_POLL_MEM_VERIFY 3 +#define HEADER_AGENT_DISPATCH 4 +#define HEADER_BARRIER 5 +#define SDMA_OP_AQL_COPY 0 +#define SDMA_OP_AQL_BARRIER_OR 0 + +/*define for op field*/ +#define SDMA_PKT_HEADER_op_offset 0 +#define SDMA_PKT_HEADER_op_mask 0x000000FF +#define SDMA_PKT_HEADER_op_shift 0 +#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_HEADER_sub_op_offset 0 +#define SDMA_PKT_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_HEADER_sub_op_shift 8 +#define SDMA_PKT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_HEADER_sub_op_mask) << SDMA_PKT_HEADER_sub_op_shift) + +/* +** Definitions for SDMA_PKT_COPY_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift) + +/*define for backwards field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift 25 +#define SDMA_PKT_COPY_LINEAR_HEADER_BACKWARDS(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_backwards_mask) << SDMA_PKT_COPY_LINEAR_HEADER_backwards_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_BC_HEADER_sub_op_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_offset 1 +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_BC_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_sw_shift) + +/*define for dst_ha field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift 22 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_dst_ha_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_sw_shift) + +/*define for src_ha field*/ +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_offset 2 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift 30 +#define SDMA_PKT_COPY_LINEAR_BC_PARAMETER_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_mask) << SDMA_PKT_COPY_LINEAR_BC_PARAMETER_src_ha_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_BC_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_DIRTY_PAGE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift) + +/*define for all field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift 31 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_ALL(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_offset 1 +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask) << SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_mtype field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift 3 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_mtype_shift) + +/*define for dst_l2_policy field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift 6 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_l2_policy_shift) + +/*define for src_mtype field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift 11 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_mtype_shift) + +/*define for src_l2_policy field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift 14 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_l2_policy_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift) + +/*define for dst_gcc field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift 19 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift) + +/*define for dst_sys field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift 20 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift) + +/*define for dst_snoop field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift 22 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift) + +/*define for dst_gpa field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift 23 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift) + +/*define for src_sys field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift 28 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift) + +/*define for src_snoop field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift 30 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift) + +/*define for src_gpa field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift 31 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_PHYSICAL_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_mtype field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift 3 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_mtype_shift) + +/*define for dst_l2_policy field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift 6 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_l2_policy_shift) + +/*define for src_mtype field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask 0x00000007 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift 11 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_MTYPE(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_mtype_shift) + +/*define for src_l2_policy field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift 14 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_L2_POLICY(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_l2_policy_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift) + +/*define for dst_gcc field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift 19 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift) + +/*define for dst_sys field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift 20 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift) + +/*define for dst_log field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift 21 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LOG(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift) + +/*define for dst_snoop field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift 22 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift) + +/*define for dst_gpa field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift 23 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift) + +/*define for src_gcc field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift 27 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift) + +/*define for src_sys field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift 28 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift) + +/*define for src_snoop field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift 30 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift) + +/*define for src_gpa field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift 31 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst2_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift) + +/*define for dst1_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST1_ADDR_LO word*/ +/*define for dst1_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift) + +/*define for DST1_ADDR_HI word*/ +/*define for dst1_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift) + +/*define for DST2_ADDR_LO word*/ +/*define for dst2_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift) + +/*define for DST2_ADDR_HI word*/ +/*define for dst2_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift) + +/*define for elementsize field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x00001FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift) + +/*define for src_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift) + +/*define for DW_5 word*/ +/*define for src_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_8 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift) + +/*define for DW_9 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x00001FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift) + +/*define for dst_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift) + +/*define for DW_10 word*/ +/*define for dst_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift) + +/*define for DW_11 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift) + +/*define for DW_12 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x00001FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_sub_op_shift) + +/*define for elementsize field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift 29 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_HEADER_elementsize_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_z_shift) + +/*define for src_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_4_src_pitch_shift) + +/*define for DW_5 word*/ +/*define for src_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_offset 5 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_5_src_slice_pitch_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_offset 6 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_offset 7 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_8 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_8_dst_y_shift) + +/*define for DW_9 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_z_shift) + +/*define for dst_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_9_dst_pitch_shift) + +/*define for DW_10 word*/ +/*define for dst_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_offset 10 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_10_dst_slice_pitch_shift) + +/*define for DW_11 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_11_rect_y_shift) + +/*define for DW_12 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_sw_shift) + +/*define for dst_ha field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift 22 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_DST_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_dst_ha_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_sw_shift) + +/*define for src_ha field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift 30 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_SRC_HA(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_BC_DW_12_src_ha_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_encrypt_mask) << SDMA_PKT_COPY_TILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_TILED_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_HEADER_tmz_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_DW_3_width_offset 3 +#define SDMA_PKT_COPY_TILED_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_3_width_shift 0 +#define SDMA_PKT_COPY_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_width_mask) << SDMA_PKT_COPY_TILED_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_DW_4_height_offset 4 +#define SDMA_PKT_COPY_TILED_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_4_height_shift 0 +#define SDMA_PKT_COPY_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_height_mask) << SDMA_PKT_COPY_TILED_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_DW_4_depth_offset 4 +#define SDMA_PKT_COPY_TILED_DW_4_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_DW_4_depth_shift 16 +#define SDMA_PKT_COPY_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_TILED_DW_5_dimension_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_5_dimension_shift 9 +#define SDMA_PKT_COPY_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_dimension_mask) << SDMA_PKT_COPY_TILED_DW_5_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_TILED_DW_5_mip_max_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_DW_5_mip_max_shift 16 +#define SDMA_PKT_COPY_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_mip_max_mask) << SDMA_PKT_COPY_TILED_DW_5_mip_max_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6 +#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0 +#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6 +#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16 +#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0 +#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift) + +/*define for linear_cc field*/ +#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_DW_7_linear_cc_shift 20 +#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_CC(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_cc_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_cc_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for LINEAR_PITCH word*/ +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift) + +/*define for LINEAR_SLICE_PITCH word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_TILED_COUNT_count_offset 12 +#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0 +#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_sub_op_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_BC_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_3_width_offset 3 +#define SDMA_PKT_COPY_TILED_BC_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_3_width_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_3_width_mask) << SDMA_PKT_COPY_TILED_BC_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_4_height_offset 4 +#define SDMA_PKT_COPY_TILED_BC_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_4_height_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_height_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_offset 4 +#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift 16 +#define SDMA_PKT_COPY_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_BC_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_element_size_shift) + +/*define for array_mode field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift 3 +#define SDMA_PKT_COPY_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_array_mode_shift) + +/*define for mit_mode field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift 8 +#define SDMA_PKT_COPY_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mit_mode_shift) + +/*define for tilesplit_size field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_tilesplit_size_shift) + +/*define for bank_w field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift 15 +#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_w_shift) + +/*define for bank_h field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift 18 +#define SDMA_PKT_COPY_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_bank_h_shift) + +/*define for num_bank field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift 21 +#define SDMA_PKT_COPY_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_num_bank_shift) + +/*define for mat_aspt field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift 24 +#define SDMA_PKT_COPY_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_mat_aspt_shift) + +/*define for pipe_config field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_offset 5 +#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift 26 +#define SDMA_PKT_COPY_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_COPY_TILED_BC_DW_5_pipe_config_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_6_x_offset 6 +#define SDMA_PKT_COPY_TILED_BC_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_6_x_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_x_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_6_y_offset 6 +#define SDMA_PKT_COPY_TILED_BC_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_BC_DW_6_y_shift 16 +#define SDMA_PKT_COPY_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_6_y_mask) << SDMA_PKT_COPY_TILED_BC_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_7_z_offset 7 +#define SDMA_PKT_COPY_TILED_BC_DW_7_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_BC_DW_7_z_shift 0 +#define SDMA_PKT_COPY_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_z_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_offset 7 +#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_BC_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_offset 7 +#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_BC_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_BC_DW_7_tile_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 8 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 9 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for LINEAR_PITCH word*/ +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_BC_LINEAR_PITCH_linear_pitch_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_TILED_BC_COUNT_count_offset 11 +#define SDMA_PKT_COPY_TILED_BC_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_COPY_TILED_BC_COUNT_count_shift 2 +#define SDMA_PKT_COPY_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_BC_COUNT_count_mask) << SDMA_PKT_COPY_TILED_BC_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift) + +/*define for videocopy field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift) + +/*define for TILED_ADDR_LO_0 word*/ +/*define for tiled_addr0_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift) + +/*define for TILED_ADDR_HI_0 word*/ +/*define for tiled_addr0_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift) + +/*define for TILED_ADDR_LO_1 word*/ +/*define for tiled_addr1_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift) + +/*define for TILED_ADDR_HI_1 word*/ +/*define for tiled_addr1_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift) + +/*define for DW_5 word*/ +/*define for width field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_offset 5 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_WIDTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift) + +/*define for DW_6 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_offset 6 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_offset 6 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_DEPTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift) + +/*define for DW_7 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift 9 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_DIMENSION(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_MIP_MAX(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_mip_max_shift) + +/*define for DW_8 word*/ +/*define for x field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift) + +/*define for y field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift) + +/*define for DW_9 word*/ +/*define for z field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x00001FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift) + +/*define for DW_10 word*/ +/*define for dst2_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for LINEAR_PITCH word*/ +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift) + +/*define for LINEAR_SLICE_PITCH word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 14 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 15 +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_COPY_T2T packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0 +#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_T2T_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_T2T_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_tmz_mask) << SDMA_PKT_COPY_T2T_HEADER_tmz_shift) + +/*define for dcc field*/ +#define SDMA_PKT_COPY_T2T_HEADER_dcc_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_shift 19 +#define SDMA_PKT_COPY_T2T_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_shift) + +/*define for dcc_dir field*/ +#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift 31 +#define SDMA_PKT_COPY_T2T_HEADER_DCC_DIR(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_dcc_dir_mask) << SDMA_PKT_COPY_T2T_HEADER_dcc_dir_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift) + +/*define for src_width field*/ +#define SDMA_PKT_COPY_T2T_DW_4_src_width_offset 4 +#define SDMA_PKT_COPY_T2T_DW_4_src_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_4_src_width_shift 16 +#define SDMA_PKT_COPY_T2T_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_DW_4_src_width_shift) + +/*define for DW_5 word*/ +/*define for src_height field*/ +#define SDMA_PKT_COPY_T2T_DW_5_src_height_offset 5 +#define SDMA_PKT_COPY_T2T_DW_5_src_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_5_src_height_shift 0 +#define SDMA_PKT_COPY_T2T_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_DW_5_src_height_shift) + +/*define for src_depth field*/ +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_offset 5 +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_shift 16 +#define SDMA_PKT_COPY_T2T_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_DW_5_src_depth_shift) + +/*define for DW_6 word*/ +/*define for src_element_size field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift) + +/*define for src_swizzle_mode field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift) + +/*define for src_dimension field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift 9 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask) << SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift) + +/*define for src_mip_max field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift 16 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_max_shift) + +/*define for src_mip_id field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift 20 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_6_src_mip_id_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7 +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8 +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9 +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9 +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift) + +/*define for DW_10 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10 +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift) + +/*define for dst_width field*/ +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_offset 10 +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_shift 16 +#define SDMA_PKT_COPY_T2T_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_width_shift) + +/*define for DW_11 word*/ +/*define for dst_height field*/ +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_offset 11 +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_shift 0 +#define SDMA_PKT_COPY_T2T_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_height_shift) + +/*define for dst_depth field*/ +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_offset 11 +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift 16 +#define SDMA_PKT_COPY_T2T_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift) + +/*define for DW_12 word*/ +/*define for dst_element_size field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift) + +/*define for dst_swizzle_mode field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_T2T_DW_12_DST_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift) + +/*define for dst_dimension field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift 9 +#define SDMA_PKT_COPY_T2T_DW_12_DST_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift) + +/*define for dst_mip_max field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift 16 +#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_max_shift) + +/*define for dst_mip_id field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift 20 +#define SDMA_PKT_COPY_T2T_DW_12_DST_MIP_ID(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_mip_id_shift) + +/*define for DW_13 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13 +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13 +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift) + +/*define for DW_14 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x00001FFF +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16 +#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24 +#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift) + +/*define for META_ADDR_LO word*/ +/*define for meta_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_offset 15 +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_T2T_META_ADDR_LO_meta_addr_31_0_shift) + +/*define for META_ADDR_HI word*/ +/*define for meta_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_offset 16 +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_T2T_META_ADDR_HI_meta_addr_63_32_shift) + +/*define for META_CONFIG word*/ +/*define for data_format field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask 0x0000007F +#define SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift 0 +#define SDMA_PKT_COPY_T2T_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_data_format_shift) + +/*define for color_transform_disable field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift 7 +#define SDMA_PKT_COPY_T2T_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_color_transform_disable_shift) + +/*define for alpha_is_on_msb field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift 8 +#define SDMA_PKT_COPY_T2T_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_alpha_is_on_msb_shift) + +/*define for number_type field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift 9 +#define SDMA_PKT_COPY_T2T_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_number_type_shift) + +/*define for surface_type field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift 12 +#define SDMA_PKT_COPY_T2T_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_surface_type_shift) + +/*define for max_comp_block_size field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift 24 +#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_comp_block_size_shift) + +/*define for max_uncomp_block_size field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift 26 +#define SDMA_PKT_COPY_T2T_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_max_uncomp_block_size_shift) + +/*define for write_compress_enable field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift 28 +#define SDMA_PKT_COPY_T2T_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_write_compress_enable_shift) + +/*define for meta_tmz field*/ +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_offset 17 +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift 29 +#define SDMA_PKT_COPY_T2T_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_T2T_META_CONFIG_meta_tmz_shift) + + +/* +** Definitions for SDMA_PKT_COPY_T2T_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_T2T_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_T2T_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_T2T_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_T2T_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_BC_HEADER_sub_op_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_z_shift) + +/*define for src_width field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_offset 4 +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_4_src_width_shift) + +/*define for DW_5 word*/ +/*define for src_height field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_offset 5 +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_height_shift) + +/*define for src_depth field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_offset 5 +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_5_src_depth_shift) + +/*define for DW_6 word*/ +/*define for src_element_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_element_size_shift) + +/*define for src_array_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift 3 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_array_mode_shift) + +/*define for src_mit_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift 8 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mit_mode_shift) + +/*define for src_tilesplit_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_tilesplit_size_shift) + +/*define for src_bank_w field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift 15 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_w_shift) + +/*define for src_bank_h field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift 18 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_bank_h_shift) + +/*define for src_num_bank field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift 21 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_num_bank_shift) + +/*define for src_mat_aspt field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift 24 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_mat_aspt_shift) + +/*define for src_pipe_config field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_offset 6 +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift 26 +#define SDMA_PKT_COPY_T2T_BC_DW_6_SRC_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_6_src_pipe_config_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_offset 7 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_offset 8 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_BC_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_offset 9 +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_offset 9 +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_9_dst_y_shift) + +/*define for DW_10 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_offset 10 +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_z_shift) + +/*define for dst_width field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_offset 10 +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_BC_DW_10_dst_width_shift) + +/*define for DW_11 word*/ +/*define for dst_height field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_offset 11 +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_height_shift) + +/*define for dst_depth field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_offset 11 +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask 0x00000FFF +#define SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_BC_DW_11_dst_depth_shift) + +/*define for DW_12 word*/ +/*define for dst_element_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_element_size_shift) + +/*define for dst_array_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift 3 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_array_mode_shift) + +/*define for dst_mit_mode field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift 8 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MIT_MODE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mit_mode_shift) + +/*define for dst_tilesplit_size field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_tilesplit_size_shift) + +/*define for dst_bank_w field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift 15 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_W(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_w_shift) + +/*define for dst_bank_h field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift 18 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_BANK_H(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_bank_h_shift) + +/*define for dst_num_bank field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift 21 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_NUM_BANK(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_num_bank_shift) + +/*define for dst_mat_aspt field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift 24 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_MAT_ASPT(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_mat_aspt_shift) + +/*define for dst_pipe_config field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_offset 12 +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift 26 +#define SDMA_PKT_COPY_T2T_BC_DW_12_DST_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_mask) << SDMA_PKT_COPY_T2T_BC_DW_12_dst_pipe_config_shift) + +/*define for DW_13 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_offset 13 +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_offset 13 +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_BC_DW_13_rect_y_shift) + +/*define for DW_14 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_offset 14 +#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift 0 +#define SDMA_PKT_COPY_T2T_BC_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_offset 14 +#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift 16 +#define SDMA_PKT_COPY_T2T_BC_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_offset 14 +#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift 24 +#define SDMA_PKT_COPY_T2T_BC_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_BC_DW_14_src_sw_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift) + +/*define for dcc field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift 19 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DCC(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_dcc_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for tiled_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift) + +/*define for tiled_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift) + +/*define for DW_4 word*/ +/*define for tiled_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift) + +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift) + +/*define for DW_5 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift) + +/*define for DW_6 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_max_shift) + +/*define for mip_id field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift 20 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_MIP_ID(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_mip_id_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for linear_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift) + +/*define for linear_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift) + +/*define for DW_10 word*/ +/*define for linear_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift) + +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift) + +/*define for DW_11 word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift) + +/*define for DW_12 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift) + +/*define for DW_13 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x00001FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift) + +/*define for META_ADDR_LO word*/ +/*define for meta_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_offset 14 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_META_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_LO_meta_addr_31_0_shift) + +/*define for META_ADDR_HI word*/ +/*define for meta_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_offset 15 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_META_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_ADDR_HI_meta_addr_63_32_shift) + +/*define for META_CONFIG word*/ +/*define for data_format field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask 0x0000007F +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_DATA_FORMAT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_data_format_shift) + +/*define for color_transform_disable field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift 7 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_COLOR_TRANSFORM_DISABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_color_transform_disable_shift) + +/*define for alpha_is_on_msb field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_ALPHA_IS_ON_MSB(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_alpha_is_on_msb_shift) + +/*define for number_type field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_NUMBER_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_number_type_shift) + +/*define for surface_type field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_SURFACE_TYPE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_surface_type_shift) + +/*define for max_comp_block_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_COMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_comp_block_size_shift) + +/*define for max_uncomp_block_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift 26 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_MAX_UNCOMP_BLOCK_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_max_uncomp_block_size_shift) + +/*define for write_compress_enable field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift 28 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_WRITE_COMPRESS_ENABLE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_write_compress_enable_shift) + +/*define for meta_tmz field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_offset 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift 29 +#define SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_META_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_META_CONFIG_meta_tmz_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED_SUBWIN_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_sub_op_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for tiled_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_x_shift) + +/*define for tiled_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_3_tiled_y_shift) + +/*define for DW_4 word*/ +/*define for tiled_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_tiled_z_shift) + +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_4_width_shift) + +/*define for DW_5 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_5_depth_shift) + +/*define for DW_6 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_element_size_shift) + +/*define for array_mode field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_ARRAY_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_array_mode_shift) + +/*define for mit_mode field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MIT_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mit_mode_shift) + +/*define for tilesplit_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift 11 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_tilesplit_size_shift) + +/*define for bank_w field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift 15 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_W(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_w_shift) + +/*define for bank_h field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift 18 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_BANK_H(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_bank_h_shift) + +/*define for num_bank field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift 21 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_NUM_BANK(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_num_bank_shift) + +/*define for mat_aspt field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_MAT_ASPT(x) ((x & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_mat_aspt_shift) + +/*define for pipe_config field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift 26 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_PIPE_CONFIG(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_6_pipe_config_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_offset 7 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_offset 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for linear_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_x_shift) + +/*define for linear_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_9_linear_y_shift) + +/*define for DW_10 word*/ +/*define for linear_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_z_shift) + +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_10_linear_pitch_shift) + +/*define for DW_11 word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_11_linear_slice_pitch_shift) + +/*define for DW_12 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_12_rect_y_shift) + +/*define for DW_13 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_rect_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_BC_DW_13_tile_sw_shift) + + +/* +** Definitions for SDMA_PKT_COPY_STRUCT packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_STRUCT_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask) << SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift) + +/*define for SB_ADDR_LO word*/ +/*define for sb_addr_31_0 field*/ +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift) + +/*define for SB_ADDR_HI word*/ +/*define for sb_addr_63_32 field*/ +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift) + +/*define for START_INDEX word*/ +/*define for start_index field*/ +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3 +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0 +#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4 +#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0 +#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift) + +/*define for DW_5 word*/ +/*define for stride field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0 +#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 16 +#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift) + +/*define for struct_sw field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003 +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 24 +#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_UNTILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_WRITE_UNTILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift 18 +#define SDMA_PKT_WRITE_UNTILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3 +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x000FFFFF +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift) + +/*define for sw field*/ +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3 +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003 +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24 +#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4 +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_TILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_WRITE_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_shift 18 +#define SDMA_PKT_WRITE_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_TILED_HEADER_tmz_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_WRITE_TILED_DW_3_width_offset 3 +#define SDMA_PKT_WRITE_TILED_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_3_width_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_WRITE_TILED_DW_4_height_offset 4 +#define SDMA_PKT_WRITE_TILED_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_4_height_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_WRITE_TILED_DW_4_depth_offset 4 +#define SDMA_PKT_WRITE_TILED_DW_4_depth_mask 0x00001FFF +#define SDMA_PKT_WRITE_TILED_DW_4_depth_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift 3 +#define SDMA_PKT_WRITE_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_shift 9 +#define SDMA_PKT_WRITE_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_dimension_mask) << SDMA_PKT_WRITE_TILED_DW_5_dimension_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask 0x0000000F +#define SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_5_MIP_MAX(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_mip_max_mask) << SDMA_PKT_WRITE_TILED_DW_5_mip_max_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6 +#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6 +#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7 +#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x00001FFF +#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift) + +/*define for sw field*/ +#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7 +#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24 +#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8 +#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0 +#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9 +#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0 +#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_TILED_BC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_TILED_BC_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_BC_HEADER_sub_op_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_BC_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_offset 3 +#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_BC_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_offset 4 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_offset 4 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask 0x000007FF +#define SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift 16 +#define SDMA_PKT_WRITE_TILED_BC_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_BC_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_element_size_shift) + +/*define for array_mode field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask 0x0000000F +#define SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift 3 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_ARRAY_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_array_mode_shift) + +/*define for mit_mode field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift 8 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_MIT_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mit_mode_shift) + +/*define for tilesplit_size field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift 11 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_TILESPLIT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_tilesplit_size_shift) + +/*define for bank_w field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift 15 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_W(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_w_shift) + +/*define for bank_h field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift 18 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_BANK_H(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_bank_h_shift) + +/*define for num_bank field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift 21 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_NUM_BANK(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_num_bank_shift) + +/*define for mat_aspt field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift 24 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_MAT_ASPT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_mat_aspt_shift) + +/*define for pipe_config field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_offset 5 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask 0x0000001F +#define SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift 26 +#define SDMA_PKT_WRITE_TILED_BC_DW_5_PIPE_CONFIG(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_mask) << SDMA_PKT_WRITE_TILED_BC_DW_5_pipe_config_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_offset 6 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_offset 6 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift 16 +#define SDMA_PKT_WRITE_TILED_BC_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_BC_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_offset 7 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask 0x000007FF +#define SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_z_shift) + +/*define for sw field*/ +#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_offset 7 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift 24 +#define SDMA_PKT_WRITE_TILED_BC_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_BC_DW_7_sw_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_offset 8 +#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift 2 +#define SDMA_PKT_WRITE_TILED_BC_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_BC_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_BC_COUNT_count_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_offset 9 +#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift 0 +#define SDMA_PKT_WRITE_TILED_BC_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_BC_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_BC_DATA0_data0_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_COPY packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_COPY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift 18 +#define SDMA_PKT_PTEPDE_COPY_HEADER_TMZ(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_tmz_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_tmz_shift) + +/*define for ptepde_op field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift 31 +#define SDMA_PKT_PTEPDE_COPY_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_DW0 word*/ +/*define for mask_dw0 field*/ +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_offset 5 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift) + +/*define for MASK_DW1 word*/ +/*define for mask_dw1 field*/ +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_offset 6 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift 0 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_offset 7 +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_mask 0x0007FFFF +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_shift 0 +#define SDMA_PKT_PTEPDE_COPY_COUNT_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_count_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_COPY_BACKWARDS packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift) + +/*define for pte_size field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask 0x00000003 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift 28 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTE_SIZE(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift) + +/*define for direction field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift 30 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_DIRECTION(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift) + +/*define for ptepde_op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift 31 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_BIT_FOR_DW word*/ +/*define for mask_first_xfer field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_offset 5 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_FIRST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift) + +/*define for mask_last_xfer field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_offset 5 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift 8 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_LAST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift) + +/*define for COUNT_IN_32B_XFER word*/ +/*define for count field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_offset 6 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask 0x0001FFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_RMW packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift) + +/*define for mtype field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask 0x00000007 +#define SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift 16 +#define SDMA_PKT_PTEPDE_RMW_HEADER_MTYPE(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_mtype_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_mtype_shift) + +/*define for gcc field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift 19 +#define SDMA_PKT_PTEPDE_RMW_HEADER_GCC(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift) + +/*define for sys field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift 20 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SYS(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift) + +/*define for snp field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift 22 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SNP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift) + +/*define for gpa field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift 23 +#define SDMA_PKT_PTEPDE_RMW_HEADER_GPA(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift) + +/*define for l2_policy field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask 0x00000003 +#define SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift 24 +#define SDMA_PKT_PTEPDE_RMW_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_l2_policy_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift) + +/*define for MASK_LO word*/ +/*define for mask_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_offset 3 +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_MASK_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask) << SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift) + +/*define for MASK_HI word*/ +/*define for mask_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_offset 4 +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_MASK_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask) << SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift) + +/*define for VALUE_LO word*/ +/*define for value_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_offset 5 +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_VALUE_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift) + +/*define for VALUE_HI word*/ +/*define for value_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_offset 6 +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_VALUE_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_INCR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_DW0 word*/ +/*define for mask_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3 +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift) + +/*define for MASK_DW1 word*/ +/*define for mask_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4 +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift) + +/*define for INIT_DW0 word*/ +/*define for init_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5 +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift) + +/*define for INIT_DW1 word*/ +/*define for init_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6 +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift) + +/*define for INCR_DW0 word*/ +/*define for incr_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7 +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift) + +/*define for INCR_DW1 word*/ +/*define for incr_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8 +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9 +#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF +#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0 +#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_INDIRECT packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_INDIRECT_HEADER_op_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF +#define SDMA_PKT_INDIRECT_HEADER_op_shift 0 +#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8 +#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift) + +/*define for vmid field*/ +#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F +#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16 +#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift) + +/*define for priv field*/ +#define SDMA_PKT_INDIRECT_HEADER_priv_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_priv_mask 0x00000001 +#define SDMA_PKT_INDIRECT_HEADER_priv_shift 31 +#define SDMA_PKT_INDIRECT_HEADER_PRIV(x) (((x) & SDMA_PKT_INDIRECT_HEADER_priv_mask) << SDMA_PKT_INDIRECT_HEADER_priv_shift) + +/*define for BASE_LO word*/ +/*define for ib_base_31_0 field*/ +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1 +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0 +#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift) + +/*define for BASE_HI word*/ +/*define for ib_base_63_32 field*/ +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2 +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0 +#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift) + +/*define for IB_SIZE word*/ +/*define for ib_size field*/ +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3 +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0 +#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift) + +/*define for CSA_ADDR_LO word*/ +/*define for csa_addr_31_0 field*/ +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4 +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0 +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift) + +/*define for CSA_ADDR_HI word*/ +/*define for csa_addr_63_32 field*/ +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5 +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0 +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_SEMAPHORE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0 +#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8 +#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift) + +/*define for write_one field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29 +#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift) + +/*define for signal field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30 +#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift) + +/*define for mailbox field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31 +#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_FENCE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_FENCE_HEADER_op_offset 0 +#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_FENCE_HEADER_op_shift 0 +#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0 +#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8 +#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift) + +/*define for mtype field*/ +#define SDMA_PKT_FENCE_HEADER_mtype_offset 0 +#define SDMA_PKT_FENCE_HEADER_mtype_mask 0x00000007 +#define SDMA_PKT_FENCE_HEADER_mtype_shift 16 +#define SDMA_PKT_FENCE_HEADER_MTYPE(x) (((x) & SDMA_PKT_FENCE_HEADER_mtype_mask) << SDMA_PKT_FENCE_HEADER_mtype_shift) + +/*define for gcc field*/ +#define SDMA_PKT_FENCE_HEADER_gcc_offset 0 +#define SDMA_PKT_FENCE_HEADER_gcc_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_gcc_shift 19 +#define SDMA_PKT_FENCE_HEADER_GCC(x) (((x) & SDMA_PKT_FENCE_HEADER_gcc_mask) << SDMA_PKT_FENCE_HEADER_gcc_shift) + +/*define for sys field*/ +#define SDMA_PKT_FENCE_HEADER_sys_offset 0 +#define SDMA_PKT_FENCE_HEADER_sys_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_sys_shift 20 +#define SDMA_PKT_FENCE_HEADER_SYS(x) (((x) & SDMA_PKT_FENCE_HEADER_sys_mask) << SDMA_PKT_FENCE_HEADER_sys_shift) + +/*define for snp field*/ +#define SDMA_PKT_FENCE_HEADER_snp_offset 0 +#define SDMA_PKT_FENCE_HEADER_snp_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_snp_shift 22 +#define SDMA_PKT_FENCE_HEADER_SNP(x) (((x) & SDMA_PKT_FENCE_HEADER_snp_mask) << SDMA_PKT_FENCE_HEADER_snp_shift) + +/*define for gpa field*/ +#define SDMA_PKT_FENCE_HEADER_gpa_offset 0 +#define SDMA_PKT_FENCE_HEADER_gpa_mask 0x00000001 +#define SDMA_PKT_FENCE_HEADER_gpa_shift 23 +#define SDMA_PKT_FENCE_HEADER_GPA(x) (((x) & SDMA_PKT_FENCE_HEADER_gpa_mask) << SDMA_PKT_FENCE_HEADER_gpa_shift) + +/*define for l2_policy field*/ +#define SDMA_PKT_FENCE_HEADER_l2_policy_offset 0 +#define SDMA_PKT_FENCE_HEADER_l2_policy_mask 0x00000003 +#define SDMA_PKT_FENCE_HEADER_l2_policy_shift 24 +#define SDMA_PKT_FENCE_HEADER_L2_POLICY(x) (((x) & SDMA_PKT_FENCE_HEADER_l2_policy_mask) << SDMA_PKT_FENCE_HEADER_l2_policy_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift) + +/*define for DATA word*/ +/*define for data field*/ +#define SDMA_PKT_FENCE_DATA_data_offset 3 +#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_DATA_data_shift 0 +#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift) + + +/* +** Definitions for SDMA_PKT_SRBM_WRITE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8 +#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift) + +/*define for byte_en field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28 +#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift) + +/*define for ADDR word*/ +/*define for addr field*/ +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1 +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0003FFFF +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0 +#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift) + +/*define for apertureid field*/ +#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_offset 1 +#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask 0x00000FFF +#define SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift 20 +#define SDMA_PKT_SRBM_WRITE_ADDR_APERTUREID(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_apertureid_mask) << SDMA_PKT_SRBM_WRITE_ADDR_apertureid_shift) + +/*define for DATA word*/ +/*define for data field*/ +#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2 +#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF +#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0 +#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift) + + +/* +** Definitions for SDMA_PKT_PRE_EXE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0 +#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8 +#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift) + +/*define for dev_sel field*/ +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16 +#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift) + +/*define for EXEC_COUNT word*/ +/*define for exec_count field*/ +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1 +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0 +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift) + + +/* +** Definitions for SDMA_PKT_COND_EXE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COND_EXE_HEADER_op_offset 0 +#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COND_EXE_HEADER_op_shift 0 +#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0 +#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8 +#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift) + +/*define for REFERENCE word*/ +/*define for reference field*/ +#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3 +#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0 +#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift) + +/*define for EXEC_COUNT word*/ +/*define for exec_count field*/ +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4 +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0 +#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift) + + +/* +** Definitions for SDMA_PKT_CONSTANT_FILL packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8 +#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift) + +/*define for sw field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16 +#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift) + +/*define for fillsize field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003 +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30 +#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DATA word*/ +/*define for src_data_31_0 field*/ +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3 +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4 +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0 +#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_DATA_FILL_MULTI packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask 0x000000FF +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift 8 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift) + +/*define for memlog_clr field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask 0x00000001 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift 31 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_MEMLOG_CLR(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift) + +/*define for BYTE_STRIDE word*/ +/*define for byte_stride field*/ +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_offset 1 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_BYTE_STRIDE(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift) + +/*define for DMA_COUNT word*/ +/*define for dma_count field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_offset 2 +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_DMA_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask) << SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for BYTE_COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_offset 5 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask 0x03FFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_POLL_REGMEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift) + +/*define for hdp_flush field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001 +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26 +#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift) + +/*define for func field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007 +#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28 +#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift) + +/*define for mem_poll field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001 +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31 +#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift) + +/*define for VALUE word*/ +/*define for value field*/ +#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3 +#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0 +#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift) + +/*define for MASK word*/ +/*define for mask field*/ +#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4 +#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0 +#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift) + +/*define for DW5 word*/ +/*define for interval field*/ +#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5 +#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF +#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0 +#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift) + +/*define for retry_count field*/ +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5 +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16 +#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift) + + +/* +** Definitions for SDMA_PKT_POLL_REG_WRITE_MEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift) + +/*define for SRC_ADDR word*/ +/*define for addr_31_2 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_offset 1 +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask 0x3FFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift 2 +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_ADDR_31_2(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift) + +/*define for DST_ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 2 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 3 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_POLL_DBIT_WRITE_MEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift) + +/*define for ea field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask 0x00000003 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift 16 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_EA(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift) + +/*define for DST_ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift) + +/*define for START_PAGE word*/ +/*define for addr_31_4 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_offset 3 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask 0x0FFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift 4 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_ADDR_31_4(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift) + +/*define for PAGE_NUM word*/ +/*define for page_num_31_0 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_offset 4 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_PAGE_NUM_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift) + + +/* +** Definitions for SDMA_PKT_POLL_MEM_VERIFY packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift) + +/*define for mode field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask 0x00000001 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift 31 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_MODE(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift) + +/*define for PATTERN word*/ +/*define for pattern field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_offset 1 +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_PATTERN(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask) << SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift) + +/*define for CMP0_ADDR_START_LO word*/ +/*define for cmp0_start_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_offset 2 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_CMP0_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift) + +/*define for CMP0_ADDR_START_HI word*/ +/*define for cmp0_start_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_offset 3 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_CMP0_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift) + +/*define for CMP0_ADDR_END_LO word*/ +/*define for cmp1_end_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_offset 4 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift) + +/*define for CMP0_ADDR_END_HI word*/ +/*define for cmp1_end_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_offset 5 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift) + +/*define for CMP1_ADDR_START_LO word*/ +/*define for cmp1_start_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_offset 6 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_CMP1_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift) + +/*define for CMP1_ADDR_START_HI word*/ +/*define for cmp1_start_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_offset 7 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_CMP1_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift) + +/*define for CMP1_ADDR_END_LO word*/ +/*define for cmp1_end_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_offset 8 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift) + +/*define for CMP1_ADDR_END_HI word*/ +/*define for cmp1_end_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_offset 9 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift) + +/*define for REC_ADDR_LO word*/ +/*define for rec_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_offset 10 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_REC_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift) + +/*define for REC_ADDR_HI word*/ +/*define for rec_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_offset 11 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_REC_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift) + +/*define for RESERVED word*/ +/*define for reserved field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_offset 12 +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift) + + +/* +** Definitions for SDMA_PKT_ATOMIC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_ATOMIC_HEADER_op_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_ATOMIC_HEADER_op_shift 0 +#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift) + +/*define for loop field*/ +#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001 +#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16 +#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift) + +/*define for tmz field*/ +#define SDMA_PKT_ATOMIC_HEADER_tmz_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_ATOMIC_HEADER_tmz_shift 18 +#define SDMA_PKT_ATOMIC_HEADER_TMZ(x) (((x) & SDMA_PKT_ATOMIC_HEADER_tmz_mask) << SDMA_PKT_ATOMIC_HEADER_tmz_shift) + +/*define for atomic_op field*/ +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25 +#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift) + +/*define for SRC_DATA_LO word*/ +/*define for src_data_31_0 field*/ +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3 +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0 +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift) + +/*define for SRC_DATA_HI word*/ +/*define for src_data_63_32 field*/ +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4 +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0 +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift) + +/*define for CMP_DATA_LO word*/ +/*define for cmp_data_31_0 field*/ +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5 +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0 +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift) + +/*define for CMP_DATA_HI word*/ +/*define for cmp_data_63_32 field*/ +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6 +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0 +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift) + +/*define for LOOP_INTERVAL word*/ +/*define for loop_interval field*/ +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7 +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0 +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_SET packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift) + +/*define for INIT_DATA_LO word*/ +/*define for init_data_31_0 field*/ +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift) + +/*define for INIT_DATA_HI word*/ +/*define for init_data_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_GET packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift) + +/*define for WRITE_ADDR_LO word*/ +/*define for write_addr_31_3 field*/ +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift) + +/*define for WRITE_ADDR_HI word*/ +/*define for write_addr_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift) + +/*define for WRITE_ADDR_LO word*/ +/*define for write_addr_31_3 field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift) + +/*define for WRITE_ADDR_HI word*/ +/*define for write_addr_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TRAP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TRAP_HEADER_op_offset 0 +#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TRAP_HEADER_op_shift 0 +#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0 +#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8 +#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift) + +/*define for INT_CONTEXT word*/ +/*define for int_context field*/ +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1 +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0 +#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift) + + +/* +** Definitions for SDMA_PKT_DUMMY_TRAP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_offset 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_shift 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_offset 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift 8 +#define SDMA_PKT_DUMMY_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift) + +/*define for INT_CONTEXT word*/ +/*define for int_context field*/ +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_offset 1 +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift 0 +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift) + + +/* +** Definitions for SDMA_PKT_GPUVM_INV packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_GPUVM_INV_HEADER_op_offset 0 +#define SDMA_PKT_GPUVM_INV_HEADER_op_mask 0x000000FF +#define SDMA_PKT_GPUVM_INV_HEADER_op_shift 0 +#define SDMA_PKT_GPUVM_INV_HEADER_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_offset 0 +#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift 8 +#define SDMA_PKT_GPUVM_INV_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GPUVM_INV_HEADER_sub_op_mask) << SDMA_PKT_GPUVM_INV_HEADER_sub_op_shift) + +/*define for PAYLOAD1 word*/ +/*define for per_vmid_inv_req field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask 0x0000FFFF +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift 0 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_PER_VMID_INV_REQ(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_per_vmid_inv_req_shift) + +/*define for flush_type field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask 0x00000007 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift 16 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FLUSH_TYPE(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_flush_type_shift) + +/*define for l2_ptes field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift 19 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_ptes_shift) + +/*define for l2_pde0 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift 20 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE0(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde0_shift) + +/*define for l2_pde1 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift 21 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE1(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde1_shift) + +/*define for l2_pde2 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift 22 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L2_PDE2(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l2_pde2_shift) + +/*define for l1_ptes field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift 23 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_L1_PTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_l1_ptes_shift) + +/*define for clr_protection_fault_status_addr field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift 24 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_CLR_PROTECTION_FAULT_STATUS_ADDR(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_clr_protection_fault_status_addr_shift) + +/*define for log_request field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift 25 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_LOG_REQUEST(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_log_request_shift) + +/*define for four_kilobytes field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_offset 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift 26 +#define SDMA_PKT_GPUVM_INV_PAYLOAD1_FOUR_KILOBYTES(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD1_four_kilobytes_shift) + +/*define for PAYLOAD2 word*/ +/*define for s field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_offset 2 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask 0x00000001 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift 0 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_S(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_s_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_s_shift) + +/*define for page_va_42_12 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_offset 2 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask 0x7FFFFFFF +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift 1 +#define SDMA_PKT_GPUVM_INV_PAYLOAD2_PAGE_VA_42_12(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD2_page_va_42_12_shift) + +/*define for PAYLOAD3 word*/ +/*define for page_va_47_43 field*/ +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_offset 3 +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask 0x0000003F +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift 0 +#define SDMA_PKT_GPUVM_INV_PAYLOAD3_PAGE_VA_47_43(x) (((x) & SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_mask) << SDMA_PKT_GPUVM_INV_PAYLOAD3_page_va_47_43_shift) + + +/* +** Definitions for SDMA_PKT_GCR_REQ packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_GCR_REQ_HEADER_op_offset 0 +#define SDMA_PKT_GCR_REQ_HEADER_op_mask 0x000000FF +#define SDMA_PKT_GCR_REQ_HEADER_op_shift 0 +#define SDMA_PKT_GCR_REQ_HEADER_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_op_mask) << SDMA_PKT_GCR_REQ_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_GCR_REQ_HEADER_sub_op_offset 0 +#define SDMA_PKT_GCR_REQ_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_GCR_REQ_HEADER_sub_op_shift 8 +#define SDMA_PKT_GCR_REQ_HEADER_SUB_OP(x) (((x) & SDMA_PKT_GCR_REQ_HEADER_sub_op_mask) << SDMA_PKT_GCR_REQ_HEADER_sub_op_shift) + +/*define for PAYLOAD1 word*/ +/*define for base_va_31_7 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_offset 1 +#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask 0x01FFFFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift 7 +#define SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD1_base_va_31_7_shift) + +/*define for PAYLOAD2 word*/ +/*define for base_va_47_32 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_offset 2 +#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask 0x0000FFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift 0 +#define SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_base_va_47_32_shift) + +/*define for gcr_control_15_0 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_offset 2 +#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask 0x0000FFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift 16 +#define SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_mask) << SDMA_PKT_GCR_REQ_PAYLOAD2_gcr_control_15_0_shift) + +/*define for PAYLOAD3 word*/ +/*define for gcr_control_18_16 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_offset 3 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask 0x00000007 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift 0 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_gcr_control_18_16_shift) + +/*define for limit_va_31_7 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_offset 3 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask 0x01FFFFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift 7 +#define SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_mask) << SDMA_PKT_GCR_REQ_PAYLOAD3_limit_va_31_7_shift) + +/*define for PAYLOAD4 word*/ +/*define for limit_va_47_32 field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_offset 4 +#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask 0x0000FFFF +#define SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift 0 +#define SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_limit_va_47_32_shift) + +/*define for vmid field*/ +#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_offset 4 +#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask 0x0000000F +#define SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift 24 +#define SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(x) (((x) & SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_mask) << SDMA_PKT_GCR_REQ_PAYLOAD4_vmid_shift) + + +/* +** Definitions for SDMA_PKT_NOP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_NOP_HEADER_op_offset 0 +#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_NOP_HEADER_op_shift 0 +#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_NOP_HEADER_sub_op_offset 0 +#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_NOP_HEADER_sub_op_shift 8 +#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift) + +/*define for count field*/ +#define SDMA_PKT_NOP_HEADER_count_offset 0 +#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF +#define SDMA_PKT_NOP_HEADER_count_shift 16 +#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_NOP_DATA0_data0_offset 1 +#define SDMA_PKT_NOP_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_NOP_DATA0_data0_shift 0 +#define SDMA_PKT_NOP_DATA0_DATA0(x) (((x) & SDMA_PKT_NOP_DATA0_data0_mask) << SDMA_PKT_NOP_DATA0_data0_shift) + + +/* +** Definitions for SDMA_AQL_PKT_HEADER packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_format_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_HEADER_HEADER_format_shift 0 +#define SDMA_AQL_PKT_HEADER_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_format_mask) << SDMA_AQL_PKT_HEADER_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_HEADER_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_barrier_mask) << SDMA_AQL_PKT_HEADER_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_HEADER_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_HEADER_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_HEADER_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_reserved_mask) << SDMA_AQL_PKT_HEADER_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_op_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_HEADER_HEADER_op_shift 16 +#define SDMA_AQL_PKT_HEADER_HEADER_OP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_op_mask) << SDMA_AQL_PKT_HEADER_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_HEADER_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_HEADER_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_subop_mask) << SDMA_AQL_PKT_HEADER_HEADER_subop_shift) + + +/* +** Definitions for SDMA_AQL_PKT_COPY_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift 16 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift) + +/*define for RESERVED_DW1 word*/ +/*define for reserved_dw1 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_offset 1 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift) + +/*define for RETURN_ADDR_LO word*/ +/*define for return_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_offset 2 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_RETURN_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift) + +/*define for RETURN_ADDR_HI word*/ +/*define for return_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_offset 3 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_RETURN_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_offset 4 +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 5 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 5 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 6 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 7 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 8 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 9 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for RESERVED_DW10 word*/ +/*define for reserved_dw10 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_offset 10 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_RESERVED_DW10(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift) + +/*define for RESERVED_DW11 word*/ +/*define for reserved_dw11 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_offset 11 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_RESERVED_DW11(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift) + +/*define for RESERVED_DW12 word*/ +/*define for reserved_dw12 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_offset 12 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift) + +/*define for RESERVED_DW13 word*/ +/*define for reserved_dw13 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_offset 13 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift) + +/*define for COMPLETION_SIGNAL_LO word*/ +/*define for completion_signal_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift) + +/*define for COMPLETION_SIGNAL_HI word*/ +/*define for completion_signal_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) + + +/* +** Definitions for SDMA_AQL_PKT_BARRIER_OR packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift 16 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift) + +/*define for RESERVED_DW1 word*/ +/*define for reserved_dw1 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_offset 1 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift) + +/*define for DEPENDENT_ADDR_0_LO word*/ +/*define for dependent_addr_0_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_offset 2 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_DEPENDENT_ADDR_0_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift) + +/*define for DEPENDENT_ADDR_0_HI word*/ +/*define for dependent_addr_0_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_offset 3 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_DEPENDENT_ADDR_0_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift) + +/*define for DEPENDENT_ADDR_1_LO word*/ +/*define for dependent_addr_1_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_offset 4 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_DEPENDENT_ADDR_1_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift) + +/*define for DEPENDENT_ADDR_1_HI word*/ +/*define for dependent_addr_1_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_offset 5 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_DEPENDENT_ADDR_1_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift) + +/*define for DEPENDENT_ADDR_2_LO word*/ +/*define for dependent_addr_2_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_offset 6 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_DEPENDENT_ADDR_2_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift) + +/*define for DEPENDENT_ADDR_2_HI word*/ +/*define for dependent_addr_2_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_offset 7 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_DEPENDENT_ADDR_2_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift) + +/*define for DEPENDENT_ADDR_3_LO word*/ +/*define for dependent_addr_3_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_offset 8 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_DEPENDENT_ADDR_3_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift) + +/*define for DEPENDENT_ADDR_3_HI word*/ +/*define for dependent_addr_3_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_offset 9 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_DEPENDENT_ADDR_3_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift) + +/*define for DEPENDENT_ADDR_4_LO word*/ +/*define for dependent_addr_4_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_offset 10 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_DEPENDENT_ADDR_4_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift) + +/*define for DEPENDENT_ADDR_4_HI word*/ +/*define for dependent_addr_4_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_offset 11 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_DEPENDENT_ADDR_4_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift) + +/*define for RESERVED_DW12 word*/ +/*define for reserved_dw12 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_offset 12 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift) + +/*define for RESERVED_DW13 word*/ +/*define for reserved_dw13 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_offset 13 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift) + +/*define for COMPLETION_SIGNAL_LO word*/ +/*define for completion_signal_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift) + +/*define for COMPLETION_SIGNAL_HI word*/ +/*define for completion_signal_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) + + +#endif /* __NAVI10_SDMA_PKT_OPEN_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c new file mode 100644 index 000000000000..835d7b1a841f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -0,0 +1,334 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbio_v2_3.h" + +#include "nbio/nbio_2_3_default.h" +#include "nbio/nbio_2_3_offset.h" +#include "nbio/nbio_2_3_sh_mask.h" + +#define smnPCIE_CONFIG_CNTL 0x11180044 +#define smnCPM_CONTROL 0x11180460 +#define smnPCIE_CNTL2 0x11180070 + +static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + + tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +static void nbio_v2_3_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, + BIF_FB_EN__FB_READ_EN_MASK | + BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); +} + +static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) + WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); + else + amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( + NBIO, 0, mmBIF_BX_PF_HDP_MEM_COHERENCY_FLUSH_CNTL), 0); +} + +static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev) +{ + return RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE); +} + +static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, + int doorbell_size) +{ + u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); + + u32 doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_SDMA0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_SDMA0_DOORBELL_RANGE, SIZE, + doorbell_size); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_SDMA0_DOORBELL_RANGE, SIZE, + 0); + + WREG32(reg, doorbell_range); +} + +static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index) +{ + u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + + u32 doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); + + WREG32(reg, doorbell_range); +} + +static void nbio_v2_3_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, + enable ? 1 : 0); +} + +static void nbio_v2_3_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = 0; + + if (enable) { + tmp = REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_SIZE, 0); + + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); + } + + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF_DOORBELL_SELFRING_GPA_APER_CNTL, + tmp); +} + + +static void nbio_v2_3_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + BIF_IH_DOORBELL_RANGE, OFFSET, + doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + BIF_IH_DOORBELL_RANGE, SIZE, + 2); + } else + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + BIF_IH_DOORBELL_RANGE, SIZE, + 0); + + WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range); +} + +static void nbio_v2_3_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8); + + interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); + /* + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, + IH_DUMMY_RD_OVERRIDE, 0); + + /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, + IH_REQ_NONSNOOP_EN, 0); + + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); +} + +static void nbio_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnCPM_CONTROL); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) { + data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_PCIE(smnCPM_CONTROL, data); +} + +static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_CNTL2); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_PCIE(smnPCIE_CNTL2, data); +} + +static void nbio_v2_3_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_PCIE(smnCPM_CONTROL); + if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_PCIE(smnPCIE_CNTL2); + if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + +static u32 nbio_v2_3_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_GPU_HDP_FLUSH_REQ); +} + +static u32 nbio_v2_3_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF_GPU_HDP_FLUSH_DONE); +} + +static u32 nbio_v2_3_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2); +} + +static u32 nbio_v2_3_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); +} + +const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = { + .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK, + .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK, + .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK, + .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK, + .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK, + .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK, + .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK, + .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK, + .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK, + .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK, + .ref_and_mask_sdma0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA0_MASK, + .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK, +}; + +static void nbio_v2_3_detect_hw_virt(struct amdgpu_device *adev) +{ + uint32_t reg; + + reg = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_RCC_IOV_FUNC_IDENTIFIER); + if (reg & 1) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; + + if (reg & 0x80000000) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; + + if (!reg) { + if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ + adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; + } +} + +static void nbio_v2_3_init_registers(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_CONFIG_CNTL); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + + if (def != data) + WREG32_PCIE(smnPCIE_CONFIG_CNTL, data); +} + +const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { + .hdp_flush_reg = &nbio_v2_3_hdp_flush_reg, + .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v2_3_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v2_3_get_pcie_data_offset, + .get_rev_id = nbio_v2_3_get_rev_id, + .mc_access_enable = nbio_v2_3_mc_access_enable, + .hdp_flush = nbio_v2_3_hdp_flush, + .get_memsize = nbio_v2_3_get_memsize, + .sdma_doorbell_range = nbio_v2_3_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v2_3_vcn_doorbell_range, + .enable_doorbell_aperture = nbio_v2_3_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbio_v2_3_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbio_v2_3_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v2_3_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v2_3_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v2_3_get_clockgating_state, + .ih_control = nbio_v2_3_ih_control, + .init_registers = nbio_v2_3_init_registers, + .detect_hw_virt = nbio_v2_3_detect_hw_virt, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h new file mode 100644 index 000000000000..5ae52085f6b7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -0,0 +1,31 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V2_3_H__ +#define __NBIO_V2_3_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c new file mode 100644 index 000000000000..662612f89c70 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -0,0 +1,821 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "amdgpu_ih.h" +#include "amdgpu_uvd.h" +#include "amdgpu_vce.h" +#include "amdgpu_ucode.h" +#include "amdgpu_psp.h" +#include "amdgpu_smu.h" +#include "atom.h" +#include "amd_pcie.h" + +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "hdp/hdp_5_0_0_offset.h" +#include "hdp/hdp_5_0_0_sh_mask.h" + +#include "soc15.h" +#include "soc15_common.h" +#include "gmc_v10_0.h" +#include "gfxhub_v2_0.h" +#include "mmhub_v2_0.h" +#include "nv.h" +#include "navi10_ih.h" +#include "gfx_v10_0.h" +#include "sdma_v5_0.h" +#include "vcn_v2_0.h" +#include "dce_virtual.h" +#include "mes_v10_1.h" + +static const struct amd_ip_funcs nv_common_ip_funcs; + +/* + * Indirect registers accessor + */ +static u32 nv_pcie_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u32 r; + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, reg); + (void)RREG32(address); + r = RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + +static void nv_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags, address, data; + + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, reg); + (void)RREG32(address); + WREG32(data, v); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u32 r; + + address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX); + data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA); + + spin_lock_irqsave(&adev->didt_idx_lock, flags); + WREG32(address, (reg)); + r = RREG32(data); + spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + return r; +} + +static void nv_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags, address, data; + + address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX); + data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA); + + spin_lock_irqsave(&adev->didt_idx_lock, flags); + WREG32(address, (reg)); + WREG32(data, (v)); + spin_unlock_irqrestore(&adev->didt_idx_lock, flags); +} + +static u32 nv_get_config_memsize(struct amdgpu_device *adev) +{ + return adev->nbio_funcs->get_memsize(adev); +} + +static u32 nv_get_xclk(struct amdgpu_device *adev) +{ + return adev->clock.spll.reference_freq; +} + + +void nv_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 queue, u32 vmid) +{ + u32 grbm_gfx_cntl = 0; + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl); +} + +static void nv_vga_set_state(struct amdgpu_device *adev, bool state) +{ + /* todo */ +} + +static bool nv_read_disabled_bios(struct amdgpu_device *adev) +{ + /* todo */ + return false; +} + +static bool nv_read_bios_from_rom(struct amdgpu_device *adev, + u8 *bios, u32 length_bytes) +{ + /* TODO: will implement it when SMU header is available */ + return false; +} + +static struct soc15_allowed_register_entry nv_allowed_read_registers[] = { + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE0)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE1)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE2)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE3)}, +#if 0 /* TODO: will set it when SDMA header is available */ + { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_STATUS_REG)}, + { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_STATUS_REG)}, +#endif + { SOC15_REG_ENTRY(GC, 0, mmCP_STAT)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT2)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT3)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, +}; + +static uint32_t nv_read_indexed_register(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 reg_offset) +{ + uint32_t val; + + mutex_lock(&adev->grbm_idx_mutex); + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + + val = RREG32(reg_offset); + + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + return val; +} + +static uint32_t nv_get_register_value(struct amdgpu_device *adev, + bool indexed, u32 se_num, + u32 sh_num, u32 reg_offset) +{ + if (indexed) { + return nv_read_indexed_register(adev, se_num, sh_num, reg_offset); + } else { + if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)) + return adev->gfx.config.gb_addr_config; + return RREG32(reg_offset); + } +} + +static int nv_read_register(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 reg_offset, u32 *value) +{ + uint32_t i; + struct soc15_allowed_register_entry *en; + + *value = 0; + for (i = 0; i < ARRAY_SIZE(nv_allowed_read_registers); i++) { + en = &nv_allowed_read_registers[i]; + if (reg_offset != + (adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset)) + continue; + + *value = nv_get_register_value(adev, + nv_allowed_read_registers[i].grbm_indexed, + se_num, sh_num, reg_offset); + return 0; + } + return -EINVAL; +} + +#if 0 +static void nv_gpu_pci_config_reset(struct amdgpu_device *adev) +{ + u32 i; + + dev_info(adev->dev, "GPU pci config reset\n"); + + /* disable BM */ + pci_clear_master(adev->pdev); + /* reset */ + amdgpu_pci_config_reset(adev); + + udelay(100); + + /* wait for asic to come out of reset */ + for (i = 0; i < adev->usec_timeout; i++) { + u32 memsize = nbio_v2_3_get_memsize(adev); + if (memsize != 0xffffffff) + break; + udelay(1); + } + +} +#endif + +static int nv_asic_mode1_reset(struct amdgpu_device *adev) +{ + u32 i; + int ret = 0; + + amdgpu_atombios_scratch_regs_engine_hung(adev, true); + + dev_info(adev->dev, "GPU mode1 reset\n"); + + /* disable BM */ + pci_clear_master(adev->pdev); + + pci_save_state(adev->pdev); + + ret = psp_gpu_reset(adev); + if (ret) + dev_err(adev->dev, "GPU mode1 reset failed\n"); + + pci_restore_state(adev->pdev); + + /* wait for asic to come out of reset */ + for (i = 0; i < adev->usec_timeout; i++) { + u32 memsize = adev->nbio_funcs->get_memsize(adev); + + if (memsize != 0xffffffff) + break; + udelay(1); + } + + amdgpu_atombios_scratch_regs_engine_hung(adev, false); + + return ret; +} +static int nv_asic_reset(struct amdgpu_device *adev) +{ + + /* FIXME: it doesn't work since vega10 */ +#if 0 + amdgpu_atombios_scratch_regs_engine_hung(adev, true); + + nv_gpu_pci_config_reset(adev); + + amdgpu_atombios_scratch_regs_engine_hung(adev, false); +#endif + int ret = 0; + struct smu_context *smu = &adev->smu; + + if (smu_baco_is_support(smu)) + ret = smu_baco_reset(smu); + else + ret = nv_asic_mode1_reset(adev); + + return ret; +} + +static int nv_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) +{ + /* todo */ + return 0; +} + +static int nv_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) +{ + /* todo */ + return 0; +} + +static void nv_pcie_gen3_enable(struct amdgpu_device *adev) +{ + if (pci_is_root_bus(adev->pdev->bus)) + return; + + if (amdgpu_pcie_gen2 == 0) + return; + + if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3))) + return; + + /* todo */ +} + +static void nv_program_aspm(struct amdgpu_device *adev) +{ + + if (amdgpu_aspm == 0) + return; + + /* todo */ +} + +static void nv_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + adev->nbio_funcs->enable_doorbell_aperture(adev, enable); + adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable); +} + +static const struct amdgpu_ip_block_version nv_common_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &nv_common_ip_funcs, +}; + +int nv_set_ip_blocks(struct amdgpu_device *adev) +{ + /* Set IP register base before any HW register access */ + switch (adev->asic_type) { + case CHIP_NAVI10: + navi10_reg_base_init(adev); + break; + default: + return -EINVAL; + } + + adev->nbio_funcs = &nbio_v2_3_funcs; + + adev->nbio_funcs->detect_hw_virt(adev); + + switch (adev->asic_type) { + case CHIP_NAVI10: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + if (adev->enable_mes) + amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); + break; + default: + return -EINVAL; + } + + return 0; +} + +static uint32_t nv_get_rev_id(struct amdgpu_device *adev) +{ + return adev->nbio_funcs->get_rev_id(adev); +} + +static void nv_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + adev->nbio_funcs->hdp_flush(adev, ring); +} + +static void nv_invalidate_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1); + } else { + amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( + HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); + } +} + +static bool nv_need_full_reset(struct amdgpu_device *adev) +{ + return true; +} + +static void nv_get_pcie_usage(struct amdgpu_device *adev, + uint64_t *count0, + uint64_t *count1) +{ + /*TODO*/ +} + +static bool nv_need_reset_on_init(struct amdgpu_device *adev) +{ +#if 0 + u32 sol_reg; + + if (adev->flags & AMD_IS_APU) + return false; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) + return true; +#endif + /* TODO: re-enable it when mode1 reset is functional */ + return false; +} + +static void nv_init_doorbell_index(struct amdgpu_device *adev) +{ + adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ; + adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0; + adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1; + adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2; + adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3; + adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4; + adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5; + adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6; + adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7; + adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END; + adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0; + adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1; + adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1; + adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3; + adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5; + adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7; + adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP; + adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP; + + adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1; + adev->doorbell_index.sdma_doorbell_range = 20; +} + +static const struct amdgpu_asic_funcs nv_asic_funcs = +{ + .read_disabled_bios = &nv_read_disabled_bios, + .read_bios_from_rom = &nv_read_bios_from_rom, + .read_register = &nv_read_register, + .reset = &nv_asic_reset, + .set_vga_state = &nv_vga_set_state, + .get_xclk = &nv_get_xclk, + .set_uvd_clocks = &nv_set_uvd_clocks, + .set_vce_clocks = &nv_set_vce_clocks, + .get_config_memsize = &nv_get_config_memsize, + .flush_hdp = &nv_flush_hdp, + .invalidate_hdp = &nv_invalidate_hdp, + .init_doorbell_index = &nv_init_doorbell_index, + .need_full_reset = &nv_need_full_reset, + .get_pcie_usage = &nv_get_pcie_usage, + .need_reset_on_init = &nv_need_reset_on_init, +}; + +static int nv_common_early_init(void *handle) +{ + bool psp_enabled = false; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->smc_rreg = NULL; + adev->smc_wreg = NULL; + adev->pcie_rreg = &nv_pcie_rreg; + adev->pcie_wreg = &nv_pcie_wreg; + + /* TODO: will add them during VCN v2 implementation */ + adev->uvd_ctx_rreg = NULL; + adev->uvd_ctx_wreg = NULL; + + adev->didt_rreg = &nv_didt_rreg; + adev->didt_wreg = &nv_didt_wreg; + + adev->asic_funcs = &nv_asic_funcs; + + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) && + (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) + psp_enabled = true; + + adev->rev_id = nv_get_rev_id(adev); + adev->external_rev_id = 0xff; + switch (adev->asic_type) { + case CHIP_NAVI10: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_MMHUB | + AMD_PG_SUPPORT_ATHUB; + adev->external_rev_id = adev->rev_id + 0x1; + break; + default: + /* FIXME: not supported yet */ + return -EINVAL; + } + + return 0; +} + +static int nv_common_late_init(void *handle) +{ + return 0; +} + +static int nv_common_sw_init(void *handle) +{ + return 0; +} + +static int nv_common_sw_fini(void *handle) +{ + return 0; +} + +static int nv_common_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* enable pcie gen2/3 link */ + nv_pcie_gen3_enable(adev); + /* enable aspm */ + nv_program_aspm(adev); + /* setup nbio registers */ + adev->nbio_funcs->init_registers(adev); + /* enable the doorbell aperture */ + nv_enable_doorbell_aperture(adev, true); + + return 0; +} + +static int nv_common_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* disable the doorbell aperture */ + nv_enable_doorbell_aperture(adev, false); + + return 0; +} + +static int nv_common_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return nv_common_hw_fini(adev); +} + +static int nv_common_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return nv_common_hw_init(adev); +} + +static bool nv_common_is_idle(void *handle) +{ + return true; +} + +static int nv_common_wait_for_idle(void *handle) +{ + return 0; +} + +static int nv_common_soft_reset(void *handle) +{ + return 0; +} + +static void nv_update_hdp_mem_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl, hdp_clk_cntl1; + uint32_t hdp_mem_pwr_cntl; + + if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD))) + return; + + hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); + hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL); + + /* Before doing clock/power mode switch, + * forced on IPH & RC clock */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + IPH_MEM_CLK_SOFT_OVERRIDE, 1); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 1); + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl); + + /* HDP 5.0 doesn't support dynamic power mode switch, + * disable clock and power gating before any changing */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_SD_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 0); + WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_LS_EN, enable); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, enable); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_DS_EN, enable); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, enable); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_SD_EN, enable); + /* RC should not use shut down mode, fallback to ds */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, enable); + } + + WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* restore IPH & RC clock override after clock/power mode changing */ + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1); +} + +static void nv_update_hdp_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) + return; + + hdp_clk_cntl = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); + + if (enable) { + hdp_clk_cntl &= + ~(uint32_t) + (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK); + } else { + hdp_clk_cntl |= HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK; + } + + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl); +} + +static int nv_common_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_NAVI10: + adev->nbio_funcs->update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + adev->nbio_funcs->update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + nv_update_hdp_mem_power_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + nv_update_hdp_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; +} + +static int nv_common_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + /* TODO */ + return 0; +} + +static void nv_common_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t tmp; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + adev->nbio_funcs->get_clockgating_state(adev, flags); + + /* AMD_CG_SUPPORT_HDP_MGCG */ + tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); + if (!(tmp & (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK))) + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + + /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ + tmp = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL); + if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; + else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_DS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_DS; + else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_SD_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_SD; + + return; +} + +static const struct amd_ip_funcs nv_common_ip_funcs = { + .name = "nv_common", + .early_init = nv_common_early_init, + .late_init = nv_common_late_init, + .sw_init = nv_common_sw_init, + .sw_fini = nv_common_sw_fini, + .hw_init = nv_common_hw_init, + .hw_fini = nv_common_hw_fini, + .suspend = nv_common_suspend, + .resume = nv_common_resume, + .is_idle = nv_common_is_idle, + .wait_for_idle = nv_common_wait_for_idle, + .soft_reset = nv_common_soft_reset, + .set_clockgating_state = nv_common_set_clockgating_state, + .set_powergating_state = nv_common_set_powergating_state, + .get_clockgating_state = nv_common_get_clockgating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h new file mode 100644 index 000000000000..639c54933cc5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -0,0 +1,33 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NV_H__ +#define __NV_H__ + +#include "nbio_v2_3.h" + +void nv_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 queue, u32 vmid); +int nv_set_ip_blocks(struct amdgpu_device *adev); +int navi10_reg_base_init(struct amdgpu_device *adev); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h new file mode 100644 index 000000000000..1de984647dbb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nvd.h @@ -0,0 +1,418 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef NVD_H +#define NVD_H + +/** + * Navi's PM4 definitions + */ +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) +#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ + ((reg) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) + +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) + +#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) + +#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) + +/* Packet 3 types */ +#define PACKET3_NOP 0x10 +#define PACKET3_SET_BASE 0x11 +#define PACKET3_BASE_INDEX(x) ((x) << 0) +#define CE_PARTITION_BASE 3 +#define PACKET3_CLEAR_STATE 0x12 +#define PACKET3_INDEX_BUFFER_SIZE 0x13 +#define PACKET3_DISPATCH_DIRECT 0x15 +#define PACKET3_DISPATCH_INDIRECT 0x16 +#define PACKET3_INDIRECT_BUFFER_END 0x17 +#define PACKET3_INDIRECT_BUFFER_CNST_END 0x19 +#define PACKET3_ATOMIC_GDS 0x1D +#define PACKET3_ATOMIC_MEM 0x1E +#define PACKET3_OCCLUSION_QUERY 0x1F +#define PACKET3_SET_PREDICATION 0x20 +#define PACKET3_REG_RMW 0x21 +#define PACKET3_COND_EXEC 0x22 +#define PACKET3_PRED_EXEC 0x23 +#define PACKET3_DRAW_INDIRECT 0x24 +#define PACKET3_DRAW_INDEX_INDIRECT 0x25 +#define PACKET3_INDEX_BASE 0x26 +#define PACKET3_DRAW_INDEX_2 0x27 +#define PACKET3_CONTEXT_CONTROL 0x28 +#define PACKET3_INDEX_TYPE 0x2A +#define PACKET3_DRAW_INDIRECT_MULTI 0x2C +#define PACKET3_DRAW_INDEX_AUTO 0x2D +#define PACKET3_NUM_INSTANCES 0x2F +#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 +#define PACKET3_INDIRECT_BUFFER_PRIV 0x32 +#define PACKET3_INDIRECT_BUFFER_CNST 0x33 +#define PACKET3_COND_INDIRECT_BUFFER_CNST 0x33 +#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 +#define PACKET3_DRAW_INDEX_OFFSET_2 0x35 +#define PACKET3_DRAW_PREAMBLE 0x36 +#define PACKET3_WRITE_DATA 0x37 +#define WRITE_DATA_DST_SEL(x) ((x) << 8) + /* 0 - register + * 1 - memory (sync - via GRBM) + * 2 - gl2 + * 3 - gds + * 4 - reserved + * 5 - memory (async - direct) + */ +#define WR_ONE_ADDR (1 << 16) +#define WR_CONFIRM (1 << 20) +#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) + /* 0 - LRU + * 1 - Stream + */ +#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) + /* 0 - me + * 1 - pfp + * 2 - ce + */ +#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 +#define PACKET3_MEM_SEMAPHORE 0x39 +# define PACKET3_SEM_USE_MAILBOX (0x1 << 16) +# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */ +# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) +# define PACKET3_SEM_SEL_WAIT (0x7 << 29) +#define PACKET3_DRAW_INDEX_MULTI_INST 0x3A +#define PACKET3_COPY_DW 0x3B +#define PACKET3_WAIT_REG_MEM 0x3C +#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) + /* 0 - always + * 1 - < + * 2 - <= + * 3 - == + * 4 - != + * 5 - >= + * 6 - > + */ +#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) + /* 0 - reg + * 1 - mem + */ +#define WAIT_REG_MEM_OPERATION(x) ((x) << 6) + /* 0 - wait_reg_mem + * 1 - wr_wait_wr_reg + */ +#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) + /* 0 - me + * 1 - pfp + */ +#define PACKET3_INDIRECT_BUFFER 0x3F +#define INDIRECT_BUFFER_VALID (1 << 23) +#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) + /* 0 - LRU + * 1 - Stream + * 2 - Bypass + */ +#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) +#define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) +#define PACKET3_COND_INDIRECT_BUFFER 0x3F +#define PACKET3_COPY_DATA 0x40 +#define PACKET3_CP_DMA 0x41 +#define PACKET3_PFP_SYNC_ME 0x42 +#define PACKET3_SURFACE_SYNC 0x43 +#define PACKET3_ME_INITIALIZE 0x44 +#define PACKET3_COND_WRITE 0x45 +#define PACKET3_EVENT_WRITE 0x46 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_* + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + */ +#define PACKET3_EVENT_WRITE_EOP 0x47 +#define PACKET3_EVENT_WRITE_EOS 0x48 +#define PACKET3_RELEASE_MEM 0x49 +#define PACKET3_RELEASE_MEM_EVENT_TYPE(x) ((x) << 0) +#define PACKET3_RELEASE_MEM_EVENT_INDEX(x) ((x) << 8) +#define PACKET3_RELEASE_MEM_GCR_GLM_WB (1 << 12) +#define PACKET3_RELEASE_MEM_GCR_GLM_INV (1 << 13) +#define PACKET3_RELEASE_MEM_GCR_GLV_INV (1 << 14) +#define PACKET3_RELEASE_MEM_GCR_GL1_INV (1 << 15) +#define PACKET3_RELEASE_MEM_GCR_GL2_US (1 << 16) +#define PACKET3_RELEASE_MEM_GCR_GL2_RANGE (1 << 17) +#define PACKET3_RELEASE_MEM_GCR_GL2_DISCARD (1 << 19) +#define PACKET3_RELEASE_MEM_GCR_GL2_INV (1 << 20) +#define PACKET3_RELEASE_MEM_GCR_GL2_WB (1 << 21) +#define PACKET3_RELEASE_MEM_GCR_SEQ (1 << 22) +#define PACKET3_RELEASE_MEM_CACHE_POLICY(x) ((x) << 25) + /* 0 - cache_policy__me_release_mem__lru + * 1 - cache_policy__me_release_mem__stream + * 2 - cache_policy__me_release_mem__noa + * 3 - cache_policy__me_release_mem__bypass + */ +#define PACKET3_RELEASE_MEM_EXECUTE (1 << 28) + +#define PACKET3_RELEASE_MEM_DATA_SEL(x) ((x) << 29) + /* 0 - discard + * 1 - send low 32bit data + * 2 - send 64bit data + * 3 - send 64bit GPU counter value + * 4 - send 64bit sys counter value + */ +#define PACKET3_RELEASE_MEM_INT_SEL(x) ((x) << 24) + /* 0 - none + * 1 - interrupt only (DATA_SEL = 0) + * 2 - interrupt when data write is confirmed + */ +#define PACKET3_RELEASE_MEM_DST_SEL(x) ((x) << 16) + /* 0 - MC + * 1 - TC/L2 + */ + + + +#define PACKET3_PREAMBLE_CNTL 0x4A +# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) +# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) +#define PACKET3_DMA_DATA 0x50 +/* 1. header + * 2. CONTROL + * 3. SRC_ADDR_LO or DATA [31:0] + * 4. SRC_ADDR_HI [31:0] + * 5. DST_ADDR_LO [31:0] + * 6. DST_ADDR_HI [7:0] + * 7. COMMAND [31:26] | BYTE_COUNT [25:0] + */ +/* CONTROL */ +# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) + /* 0 - ME + * 1 - PFP + */ +# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) + /* 0 - LRU + * 1 - Stream + */ +# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) + /* 0 - DST_ADDR using DAS + * 1 - GDS + * 3 - DST_ADDR using L2 + */ +# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) + /* 0 - LRU + * 1 - Stream + */ +# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) + /* 0 - SRC_ADDR using SAS + * 1 - GDS + * 2 - DATA + * 3 - SRC_ADDR using L2 + */ +# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) +/* COMMAND */ +# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) + /* 0 - memory + * 1 - register + */ +# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) + /* 0 - memory + * 1 - register + */ +# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) +# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) +# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) +#define PACKET3_CONTEXT_REG_RMW 0x51 +#define PACKET3_GFX_CNTX_UPDATE 0x52 +#define PACKET3_BLK_CNTX_UPDATE 0x53 +#define PACKET3_INCR_UPDT_STATE 0x55 +#define PACKET3_ACQUIRE_MEM 0x58 +#define PACKET3_REWIND 0x59 +#define PACKET3_INTERRUPT 0x5A +#define PACKET3_GEN_PDEPTE 0x5B +#define PACKET3_INDIRECT_BUFFER_PASID 0x5C +#define PACKET3_PRIME_UTCL2 0x5D +#define PACKET3_LOAD_UCONFIG_REG 0x5E +#define PACKET3_LOAD_SH_REG 0x5F +#define PACKET3_LOAD_CONFIG_REG 0x60 +#define PACKET3_LOAD_CONTEXT_REG 0x61 +#define PACKET3_LOAD_COMPUTE_STATE 0x62 +#define PACKET3_LOAD_SH_REG_INDEX 0x63 +#define PACKET3_SET_CONFIG_REG 0x68 +#define PACKET3_SET_CONFIG_REG_START 0x00002000 +#define PACKET3_SET_CONFIG_REG_END 0x00002c00 +#define PACKET3_SET_CONTEXT_REG 0x69 +#define PACKET3_SET_CONTEXT_REG_START 0x0000a000 +#define PACKET3_SET_CONTEXT_REG_END 0x0000a400 +#define PACKET3_SET_CONTEXT_REG_INDEX 0x6A +#define PACKET3_SET_VGPR_REG_DI_MULTI 0x71 +#define PACKET3_SET_SH_REG_DI 0x72 +#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 +#define PACKET3_SET_SH_REG_DI_MULTI 0x74 +#define PACKET3_GFX_PIPE_LOCK 0x75 +#define PACKET3_SET_SH_REG 0x76 +#define PACKET3_SET_SH_REG_START 0x00002c00 +#define PACKET3_SET_SH_REG_END 0x00003000 +#define PACKET3_SET_SH_REG_OFFSET 0x77 +#define PACKET3_SET_QUEUE_REG 0x78 +#define PACKET3_SET_UCONFIG_REG 0x79 +#define PACKET3_SET_UCONFIG_REG_START 0x0000c000 +#define PACKET3_SET_UCONFIG_REG_END 0x0000c400 +#define PACKET3_SET_UCONFIG_REG_INDEX 0x7A +#define PACKET3_FORWARD_HEADER 0x7C +#define PACKET3_SCRATCH_RAM_WRITE 0x7D +#define PACKET3_SCRATCH_RAM_READ 0x7E +#define PACKET3_LOAD_CONST_RAM 0x80 +#define PACKET3_WRITE_CONST_RAM 0x81 +#define PACKET3_DUMP_CONST_RAM 0x83 +#define PACKET3_INCREMENT_CE_COUNTER 0x84 +#define PACKET3_INCREMENT_DE_COUNTER 0x85 +#define PACKET3_WAIT_ON_CE_COUNTER 0x86 +#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 +#define PACKET3_SWITCH_BUFFER 0x8B +#define PACKET3_DISPATCH_DRAW_PREAMBLE 0x8C +#define PACKET3_DISPATCH_DRAW_PREAMBLE_ACE 0x8C +#define PACKET3_DISPATCH_DRAW 0x8D +#define PACKET3_DISPATCH_DRAW_ACE 0x8D +#define PACKET3_GET_LOD_STATS 0x8E +#define PACKET3_DRAW_MULTI_PREAMBLE 0x8F +#define PACKET3_FRAME_CONTROL 0x90 +# define FRAME_CMD(x) ((x) << 28) + /* + * x=0: tmz_begin + * x=1: tmz_end + */ +#define PACKET3_INDEX_ATTRIBUTES_INDIRECT 0x91 +#define PACKET3_WAIT_REG_MEM64 0x93 +#define PACKET3_COND_PREEMPT 0x94 +#define PACKET3_HDP_FLUSH 0x95 +#define PACKET3_COPY_DATA_RB 0x96 +#define PACKET3_INVALIDATE_TLBS 0x98 +# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) +# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) +# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) +#define PACKET3_AQL_PACKET 0x99 +#define PACKET3_DMA_DATA_FILL_MULTI 0x9A +#define PACKET3_SET_SH_REG_INDEX 0x9B +#define PACKET3_DRAW_INDIRECT_COUNT_MULTI 0x9C +#define PACKET3_DRAW_INDEX_INDIRECT_COUNT_MULTI 0x9D +#define PACKET3_DUMP_CONST_RAM_OFFSET 0x9E +#define PACKET3_LOAD_CONTEXT_REG_INDEX 0x9F +#define PACKET3_SET_RESOURCES 0xA0 +/* 1. header + * 2. CONTROL + * 3. QUEUE_MASK_LO [31:0] + * 4. QUEUE_MASK_HI [31:0] + * 5. GWS_MASK_LO [31:0] + * 6. GWS_MASK_HI [31:0] + * 7. OAC_MASK [15:0] + * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0] + */ +# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0) +# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16) +# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29) +#define PACKET3_MAP_PROCESS 0xA1 +#define PACKET3_MAP_QUEUES 0xA2 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. MQD_ADDR_LO [31:0] + * 5. MQD_ADDR_HI [31:0] + * 6. WPTR_ADDR_LO [31:0] + * 7. WPTR_ADDR_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8) +# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13) +# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16) +# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18) +# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21) +# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24) +# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2 */ +# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1) +# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2) +#define PACKET3_UNMAP_QUEUES 0xA3 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. CONTROL3 + * 5. CONTROL4 + * 6. CONTROL5 + */ +/* CONTROL */ +# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0) + /* 0 - PREEMPT_QUEUES + * 1 - RESET_QUEUES + * 2 - DISABLE_PROCESS_QUEUES + * 3 - PREEMPT_QUEUES_NO_UNMAP + */ +# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2a */ +# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2) +/* CONTROL3a */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2) +/* CONTROL3b */ +# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0) +/* CONTROL4 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2) +/* CONTROL5 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2) +#define PACKET3_QUERY_STATUS 0xA4 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. ADDR_LO [31:0] + * 5. ADDR_HI [31:0] + * 6. DATA_LO [31:0] + * 7. DATA_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0) +# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28) +# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30) +/* CONTROL2a */ +# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) +# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) +#define PACKET3_RUN_LIST 0xA5 +#define PACKET3_MAP_PROCESS_VM 0xA6 + + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 7f8edc66ddff..5080a73a95a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -80,6 +80,18 @@ struct psp_gfx_ctrl */ #define GFX_FLAG_RESPONSE 0x80000000 +/* Gbr IH registers ID */ +enum ih_reg_id { + IH_RB = 0, // IH_RB_CNTL + IH_RB_RNG1 = 1, // IH_RB_CNTL_RING1 + IH_RB_RNG2 = 2, // IH_RB_CNTL_RING2 +}; + +/* Command to setup Gibraltar IH register */ +struct psp_gfx_cmd_gbr_ih_reg { + uint32_t reg_value; /* Value to be set to the IH_RB_CNTL... register*/ + enum ih_reg_id reg_id; /* ID of the register */ +}; /* TEE Gfx Command IDs for the ring buffer interface. */ enum psp_gfx_cmd_id @@ -95,9 +107,11 @@ enum psp_gfx_cmd_id GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ + /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */ + GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */ + GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */ }; - /* Command to load Trusted Application binary into PSP OS. */ struct psp_gfx_cmd_load_ta { @@ -169,33 +183,59 @@ struct psp_gfx_cmd_setup_tmr /* FW types for GFX_CMD_ID_LOAD_IP_FW command. Limit 31. */ -enum psp_gfx_fw_type -{ - GFX_FW_TYPE_NONE = 0, - GFX_FW_TYPE_CP_ME = 1, - GFX_FW_TYPE_CP_PFP = 2, - GFX_FW_TYPE_CP_CE = 3, - GFX_FW_TYPE_CP_MEC = 4, - GFX_FW_TYPE_CP_MEC_ME1 = 5, - GFX_FW_TYPE_CP_MEC_ME2 = 6, - GFX_FW_TYPE_RLC_V = 7, - GFX_FW_TYPE_RLC_G = 8, - GFX_FW_TYPE_SDMA0 = 9, - GFX_FW_TYPE_SDMA1 = 10, - GFX_FW_TYPE_DMCU_ERAM = 11, - GFX_FW_TYPE_DMCU_ISR = 12, - GFX_FW_TYPE_VCN = 13, - GFX_FW_TYPE_UVD = 14, - GFX_FW_TYPE_VCE = 15, - GFX_FW_TYPE_ISP = 16, - GFX_FW_TYPE_ACP = 17, - GFX_FW_TYPE_SMU = 18, - GFX_FW_TYPE_MMSCH = 19, - GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, - GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, - GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL = 22, - GFX_FW_TYPE_UVD1 = 23, - GFX_FW_TYPE_MAX = 24 +enum psp_gfx_fw_type { + GFX_FW_TYPE_NONE = 0, /* */ + GFX_FW_TYPE_CP_ME = 1, /* CP-ME VG + RV */ + GFX_FW_TYPE_CP_PFP = 2, /* CP-PFP VG + RV */ + GFX_FW_TYPE_CP_CE = 3, /* CP-CE VG + RV */ + GFX_FW_TYPE_CP_MEC = 4, /* CP-MEC FW VG + RV */ + GFX_FW_TYPE_CP_MEC_ME1 = 5, /* CP-MEC Jump Table 1 VG + RV */ + GFX_FW_TYPE_CP_MEC_ME2 = 6, /* CP-MEC Jump Table 2 VG */ + GFX_FW_TYPE_RLC_V = 7, /* RLC-V VG */ + GFX_FW_TYPE_RLC_G = 8, /* RLC-G VG + RV */ + GFX_FW_TYPE_SDMA0 = 9, /* SDMA0 VG + RV */ + GFX_FW_TYPE_SDMA1 = 10, /* SDMA1 VG */ + GFX_FW_TYPE_DMCU_ERAM = 11, /* DMCU-ERAM VG + RV */ + GFX_FW_TYPE_DMCU_ISR = 12, /* DMCU-ISR VG + RV */ + GFX_FW_TYPE_VCN = 13, /* VCN RV */ + GFX_FW_TYPE_UVD = 14, /* UVD VG */ + GFX_FW_TYPE_VCE = 15, /* VCE VG */ + GFX_FW_TYPE_ISP = 16, /* ISP RV */ + GFX_FW_TYPE_ACP = 17, /* ACP RV */ + GFX_FW_TYPE_SMU = 18, /* SMU VG */ + GFX_FW_TYPE_MMSCH = 19, /* MMSCH VG */ + GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, /* RLC GPM VG + RV */ + GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, /* RLC SRM VG + RV */ + GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL = 22, /* RLC CNTL VG + RV */ + GFX_FW_TYPE_UVD1 = 23, /* UVD1 VG-20 */ + GFX_FW_TYPE_TOC = 24, /* TOC NV-10 */ + GFX_FW_TYPE_RLC_P = 25, /* RLC P NV */ + GFX_FW_TYPE_RLX6 = 26, /* RLX6 NV */ + GFX_FW_TYPE_GLOBAL_TAP_DELAYS = 27, /* GLOBAL TAP DELAYS NV */ + GFX_FW_TYPE_SE0_TAP_DELAYS = 28, /* SE0 TAP DELAYS NV */ + GFX_FW_TYPE_SE1_TAP_DELAYS = 29, /* SE1 TAP DELAYS NV */ + GFX_FW_TYPE_GLOBAL_SE0_SE1_SKEW_DELAYS = 30, /* GLOBAL SE0/1 SKEW DELAYS NV */ + GFX_FW_TYPE_SDMA0_JT = 31, /* SDMA0 JT NV */ + GFX_FW_TYPE_SDMA1_JT = 32, /* SDNA1 JT NV */ + GFX_FW_TYPE_CP_MES = 33, /* CP MES NV */ + GFX_FW_TYPE_MES_STACK = 34, /* MES STACK NV */ + GFX_FW_TYPE_RLC_SRM_DRAM_SR = 35, /* RLC SRM DRAM NV */ + GFX_FW_TYPE_RLCG_SCRATCH_SR = 36, /* RLCG SCRATCH NV */ + GFX_FW_TYPE_RLCP_SCRATCH_SR = 37, /* RLCP SCRATCH NV */ + GFX_FW_TYPE_RLCV_SCRATCH_SR = 38, /* RLCV SCRATCH NV */ + GFX_FW_TYPE_RLX6_DRAM_SR = 39, /* RLX6 DRAM NV */ + GFX_FW_TYPE_SDMA0_PG_CONTEXT = 40, /* SDMA0 PG CONTEXT NV */ + GFX_FW_TYPE_SDMA1_PG_CONTEXT = 41, /* SDMA1 PG CONTEXT NV */ + GFX_FW_TYPE_GLOBAL_MUX_SELECT_RAM = 42, /* GLOBAL MUX SEL RAM NV */ + GFX_FW_TYPE_SE0_MUX_SELECT_RAM = 43, /* SE0 MUX SEL RAM NV */ + GFX_FW_TYPE_SE1_MUX_SELECT_RAM = 44, /* SE1 MUX SEL RAM NV */ + GFX_FW_TYPE_ACCUM_CTRL_RAM = 45, /* ACCUM CTRL RAM NV */ + GFX_FW_TYPE_RLCP_CAM = 46, /* RLCP CAM NV */ + GFX_FW_TYPE_RLC_SPP_CAM_EXT = 47, /* RLC SPP CAM EXT NV */ + GFX_FW_TYPE_RLX6_DRAM_BOOT = 48, /* RLX6 DRAM BOOT NV */ + GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV */ + GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV */ + GFX_FW_TYPE_MAX }; /* Command to load HW IP FW. */ @@ -224,6 +264,14 @@ struct psp_gfx_cmd_reg_prog { uint32_t reg_id; }; +/* Command to load TOC */ +struct psp_gfx_cmd_load_toc +{ + uint32_t toc_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */ + uint32_t toc_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */ + uint32_t toc_size; /* FW buffer size in bytes */ +}; + /* All GFX ring buffer commands. */ union psp_gfx_commands { @@ -234,21 +282,23 @@ union psp_gfx_commands struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw; struct psp_gfx_cmd_reg_prog cmd_setup_reg_prog; + struct psp_gfx_cmd_setup_tmr cmd_setup_vmr; + struct psp_gfx_cmd_load_toc cmd_load_toc; }; - /* Structure of GFX Response buffer. * For GPCOM I/F it is part of GFX_CMD_RESP buffer, for RBI * it is separate buffer. */ struct psp_gfx_resp { - uint32_t status; /* +0 status of command execution */ - uint32_t session_id; /* +4 session ID in response to LoadTa command */ - uint32_t fw_addr_lo; /* +8 bits [31:0] of FW address within TMR (in response to cmd_load_ip_fw command) */ - uint32_t fw_addr_hi; /* +12 bits [63:32] of FW address within TMR (in response to cmd_load_ip_fw command) */ + uint32_t status; /* +0 status of command execution */ + uint32_t session_id; /* +4 session ID in response to LoadTa command */ + uint32_t fw_addr_lo; /* +8 bits [31:0] of FW address within TMR (in response to cmd_load_ip_fw command) */ + uint32_t fw_addr_hi; /* +12 bits [63:32] of FW address within TMR (in response to cmd_load_ip_fw command) */ + uint32_t tmr_size; /* +16 size of the TMR to be reserved including MM fw and Gfx fw in response to cmd_load_toc command */ - uint32_t reserved[4]; + uint32_t reserved[3]; /* total 32 bytes */ }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index b1e7aca72578..41b72588adcf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -41,9 +41,16 @@ MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); +MODULE_FIRMWARE("amdgpu/navi10_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 +/* navi10 reg offset define */ +#define mmRLC_GPM_UCODE_ADDR_NV10 0x5b61 +#define mmRLC_GPM_UCODE_DATA_NV10 0x5b62 +#define mmSDMA0_UCODE_ADDR_NV10 0x5880 +#define mmSDMA0_UCODE_DATA_NV10 0x5881 static int psp_v11_0_init_microcode(struct psp_context *psp) { @@ -52,6 +59,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) char fw_name[30]; int err = 0; const struct psp_firmware_header_v1_0 *sos_hdr; + const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; const struct psp_firmware_header_v1_0 *asd_hdr; const struct ta_firmware_header_v1_0 *ta_hdr; @@ -61,6 +69,9 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_VEGA20: chip_name = "vega20"; break; + case CHIP_NAVI10: + chip_name = "navi10"; + break; default: BUG(); } @@ -75,15 +86,34 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) goto out; sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; - adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version); - adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version); - adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes); - adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->header.ucode_size_bytes) - - le32_to_cpu(sos_hdr->sos_size_bytes); - adev->psp.sys_start_addr = (uint8_t *)sos_hdr + + amdgpu_ucode_print_psp_hdr(&sos_hdr->header); + + switch (sos_hdr->header.header_version_major) { + case 1: + adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version); + adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version); + adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes); + adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->sos_offset_bytes); + adev->psp.sys_start_addr = (uint8_t *)sos_hdr + le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); - adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + + adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + le32_to_cpu(sos_hdr->sos_offset_bytes); + if (sos_hdr->header.header_version_minor == 1) { + sos_hdr_v1_1 = (const struct psp_firmware_header_v1_1 *)adev->psp.sos_fw->data; + adev->psp.toc_bin_size = le32_to_cpu(sos_hdr_v1_1->toc_size_bytes); + adev->psp.toc_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr_v1_1->toc_offset_bytes); + adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_1->kdb_size_bytes); + adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes); + } + break; + default: + dev_err(adev->dev, + "Unsupported psp sos firmware\n"); + err = -EINVAL; + goto out; + } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); @@ -101,30 +131,36 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) adev->psp.asd_start_addr = (uint8_t *)asd_hdr + le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); - err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); - if (err) { - release_firmware(adev->psp.ta_fw); - adev->psp.ta_fw = NULL; - dev_info(adev->dev, - "psp v11.0: Failed to load firmware \"%s\"\n", fw_name); - } else { - err = amdgpu_ucode_validate(adev->psp.ta_fw); - if (err) - goto out2; - - ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; - adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); - adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); - adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + - le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); - - adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); - - adev->psp.ta_ras_ucode_version = le32_to_cpu(ta_hdr->ta_ras_ucode_version); - adev->psp.ta_ras_ucode_size = le32_to_cpu(ta_hdr->ta_ras_size_bytes); - adev->psp.ta_ras_start_addr = (uint8_t *)adev->psp.ta_xgmi_start_addr + - le32_to_cpu(ta_hdr->ta_ras_offset_bytes); + switch (adev->asic_type) { + case CHIP_VEGA20: + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) { + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; + dev_info(adev->dev, + "psp v11.0: Failed to load firmware \"%s\"\n", fw_name); + } else { + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out2; + + ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; + adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); + adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); + adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + adev->psp.ta_fw_version = le32_to_cpu(ta_hdr->header.ucode_version); + adev->psp.ta_ras_ucode_version = le32_to_cpu(ta_hdr->ta_ras_ucode_version); + adev->psp.ta_ras_ucode_size = le32_to_cpu(ta_hdr->ta_ras_size_bytes); + adev->psp.ta_ras_start_addr = (uint8_t *)adev->psp.ta_xgmi_start_addr + + le32_to_cpu(ta_hdr->ta_ras_offset_bytes); + } + break; + case CHIP_NAVI10: + break; + default: + BUG(); } return 0; @@ -144,6 +180,48 @@ out: return err; } +static int psp_v11_0_bootloader_load_kdb(struct psp_context *psp) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + /* Check tOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) { + psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version); + return 0; + } + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy PSP KDB binary to memory */ + memcpy(psp->fw_pri_buf, psp->kdb_start_addr, psp->kdb_bin_size); + + /* Provide the sys driver to bootloader */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = PSP_BL__LOAD_KEY_DATABASE; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1*/ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + + return ret; +} + static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) { int ret; @@ -157,7 +235,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); if (sol_reg) { psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); - printk("sos fw version = 0x%x.\n", psp->sos_fw_version); + dev_info(adev->dev, "sos fw version = 0x%x.\n", psp->sos_fw_version); return 0; } @@ -175,7 +253,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = 1 << 16; + psp_gfxdrv_command_reg = PSP_BL__LOAD_SYSDRV; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); @@ -216,7 +294,7 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = 2 << 16; + psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); @@ -501,14 +579,24 @@ psp_v11_0_sram_map(struct amdgpu_device *adev, case AMDGPU_UCODE_ID_RLC_G: *sram_offset = 0x2000; - *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); - *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + if (adev->asic_type < CHIP_NAVI10) { + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + } else { + *sram_addr_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmRLC_GPM_UCODE_ADDR_NV10; + *sram_data_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmRLC_GPM_UCODE_DATA_NV10; + } break; case AMDGPU_UCODE_ID_SDMA0: *sram_offset = 0x0; - *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); - *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + if (adev->asic_type < CHIP_NAVI10) { + *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + } else { + *sram_addr_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmSDMA0_UCODE_ADDR_NV10; + *sram_data_reg_offset = adev->reg_offset[GC_HWIP][0][1] + mmSDMA0_UCODE_DATA_NV10; + } break; /* TODO: needs to confirm */ @@ -772,8 +860,14 @@ static int psp_v11_0_ras_cure_posion(struct psp_context *psp, uint64_t *mode_ptr #endif } +static int psp_v11_0_rlc_autoload_start(struct psp_context *psp) +{ + return psp_rlc_autoload_start(psp); +} + static const struct psp_funcs psp_v11_0_funcs = { .init_microcode = psp_v11_0_init_microcode, + .bootloader_load_kdb = psp_v11_0_bootloader_load_kdb, .bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv, .bootloader_load_sos = psp_v11_0_bootloader_load_sos, .ring_init = psp_v11_0_ring_init, @@ -790,6 +884,7 @@ static const struct psp_funcs psp_v11_0_funcs = { .support_vmr_ring = psp_v11_0_support_vmr_ring, .ras_trigger_error = psp_v11_0_ras_trigger_error, .ras_cure_posion = psp_v11_0_ras_cure_posion, + .rlc_autoload_start = psp_v11_0_rlc_autoload_start, }; void psp_v11_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 2ea772692037..019c47feee42 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -155,7 +155,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = 1 << 16; + psp_gfxdrv_command_reg = PSP_BL__LOAD_SYSDRV; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); @@ -218,7 +218,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); - psp_gfxdrv_command_reg = 2 << 16; + psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index bc3087599523..4428018672d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1095,7 +1095,7 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) static int sdma_v4_0_start(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - int i, r; + int i, r = 0; if (amdgpu_sriov_vf(adev)) { sdma_v4_0_ctx_switch_enable(adev, false); @@ -1569,9 +1569,7 @@ static int sdma_v4_0_late_init(void *handle) if (r) goto interrupt; - r = amdgpu_ras_debugfs_create(adev, &fs_info); - if (r) - goto debugfs; + amdgpu_ras_debugfs_create(adev, &fs_info); r = amdgpu_ras_sysfs_create(adev, &fs_info); if (r) @@ -1592,7 +1590,6 @@ irq: amdgpu_ras_sysfs_remove(adev, *ras_if); sysfs: amdgpu_ras_debugfs_remove(adev, *ras_if); -debugfs: amdgpu_ras_interrupt_remove_handler(adev, &ih_info); interrupt: amdgpu_ras_feature_enable(adev, *ras_if, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c new file mode 100644 index 000000000000..3747c3f1f0cc --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -0,0 +1,1687 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_ucode.h" +#include "amdgpu_trace.h" + +#include "gc/gc_10_1_0_offset.h" +#include "gc/gc_10_1_0_sh_mask.h" +#include "hdp/hdp_5_0_0_offset.h" +#include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h" +#include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h" + +#include "soc15_common.h" +#include "soc15.h" +#include "navi10_sdma_pkt_open.h" +#include "nbio_v2_3.h" +#include "sdma_v5_0.h" + +MODULE_FIRMWARE("amdgpu/navi10_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin"); + +#define SDMA1_REG_OFFSET 0x600 +#define SDMA0_HYP_DEC_REG_START 0x5880 +#define SDMA0_HYP_DEC_REG_END 0x5893 +#define SDMA1_HYP_DEC_REG_OFFSET 0x20 + +static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev); +static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev); +static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev); +static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev); + +static const struct soc15_reg_golden golden_settings_sdma_5[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_UTCL1_PAGE, 0x00ffffff, 0x000c5c00), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_CHICKEN_BITS, 0xffbf1f0f, 0x03ab0107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_UTCL1_PAGE, 0x00ffffff, 0x000c5c00) +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv10[] = { +}; + +static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) +{ + u32 base; + + if (internal_offset >= SDMA0_HYP_DEC_REG_START && + internal_offset <= SDMA0_HYP_DEC_REG_END) { + base = adev->reg_offset[GC_HWIP][0][1]; + if (instance == 1) + internal_offset += SDMA1_HYP_DEC_REG_OFFSET; + } else { + base = adev->reg_offset[GC_HWIP][0][0]; + if (instance == 1) + internal_offset += SDMA1_REG_OFFSET; + } + + return base + internal_offset; +} + +static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_NAVI10: + soc15_program_register_sequence(adev, + golden_settings_sdma_5, + (const u32)ARRAY_SIZE(golden_settings_sdma_5)); + soc15_program_register_sequence(adev, + golden_settings_sdma_nv10, + (const u32)ARRAY_SIZE(golden_settings_sdma_nv10)); + break; + default: + break; + } +} + +/** + * sdma_v5_0_init_microcode - load ucode images from disk + * + * @adev: amdgpu_device pointer + * + * Use the firmware interface to load the ucode images into + * the driver (not loaded into hw). + * Returns 0 on success, error on failure. + */ + +// emulation only, won't work on real chip +// navi10 real chip need to use PSP to load firmware +static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[30]; + int err = 0, i; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct sdma_firmware_header_v1_0 *hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_NAVI10: + chip_name = "navi10"; + break; + default: + BUG(); + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (i == 0) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); + err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); + if (err) + goto out; + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; + adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); + if (adev->sdma.instance[i].feature_version >= 20) + adev->sdma.instance[i].burst_nop = true; + DRM_DEBUG("psp_load == '%s'\n", + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; + info->fw = adev->sdma.instance[i].fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } + } +out: + if (err) { + DRM_ERROR("sdma_v5_0: Failed to load firmware \"%s\"\n", fw_name); + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } + } + return err; +} + +static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring) +{ + unsigned ret; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); + amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, 1); + ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ + amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + + return ret; +} + +static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring, + unsigned offset) +{ + unsigned cur; + + BUG_ON(offset > ring->buf_mask); + BUG_ON(ring->ring[offset] != 0x55aa55aa); + + cur = (ring->wptr - 1) & ring->buf_mask; + if (cur > offset) + ring->ring[offset] = cur - offset; + else + ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; +} + +/** + * sdma_v5_0_ring_get_rptr - get the current read pointer + * + * @ring: amdgpu ring pointer + * + * Get the current rptr from the hardware (NAVI10+). + */ +static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring) +{ + u64 *rptr; + + /* XXX check if swapping is necessary on BE */ + rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]); + + DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); + return ((*rptr) >> 2); +} + +/** + * sdma_v5_0_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware (NAVI10+). + */ +static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64 *wptr = NULL; + uint64_t local_wptr = 0; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); + *wptr = (*wptr) >> 2; + DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); + } else { + u32 lowbit, highbit; + + wptr = &local_wptr; + lowbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; + highbit = RREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; + + DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", + ring->me, highbit, lowbit); + *wptr = highbit; + *wptr = (*wptr) << 32; + *wptr |= lowbit; + } + + return *wptr; +} + +/** + * sdma_v5_0_ring_set_wptr - commit the write pointer + * + * @ring: amdgpu ring pointer + * + * Write the wptr back to the hardware (NAVI10+). + */ +static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + DRM_DEBUG("Setting write pointer\n"); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2); + adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } +} + +static void sdma_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->funcs->nop | + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->funcs->nop); +} + +/** + * sdma_v5_0_ring_emit_ib - Schedule an IB on the DMA engine + * + * @ring: amdgpu ring pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (NAVI10). + */ +static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); + + /* IB packet must end on a 8 DW boundary */ + sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); + /* base must be 32 byte aligned */ + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); + amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr)); + amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr)); +} + +/** + * sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring + * + * @ring: amdgpu ring pointer + * + * Emit an hdp flush packet on the requested DMA ring. + */ +static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 ref_and_mask = 0; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; + + if (ring->me == 0) + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; + else + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ +} + +/** + * sdma_v5_0_ring_emit_fence - emit a fence on the DMA ring + * + * @ring: amdgpu ring pointer + * @fence: amdgpu fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed (NAVI10). + */ +static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + struct amdgpu_device *adev = ring->adev; + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + /* write the fence */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + /* optionally write high bits as well */ + if (write64bit) { + addr += 4; + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE) | + SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + } + + /* Interrupt not work fine on GFX10.1 model yet. Use fallback instead */ + if ((flags & AMDGPU_FENCE_FLAG_INT) && adev->pdev->device != 0x50) { + /* generate an interrupt */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); + } +} + + +/** + * sdma_v5_0_gfx_stop - stop the gfx async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the gfx async dma ring buffers (NAVI10). + */ +static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev) +{ + struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; + struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; + u32 rb_cntl, ib_cntl; + int i; + + if ((adev->mman.buffer_funcs_ring == sdma0) || + (adev->mman.buffer_funcs_ring == sdma1)) + amdgpu_ttm_set_buffer_funcs_status(adev, false); + + for (i = 0; i < adev->sdma.num_instances; i++) { + rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + } + + sdma0->sched.ready = false; + sdma1->sched.ready = false; +} + +/** + * sdma_v5_0_rlc_stop - stop the compute async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the compute async dma queues (NAVI10). + */ +static void sdma_v5_0_rlc_stop(struct amdgpu_device *adev) +{ + /* XXX todo */ +} + +/** + * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs context switch. + * + * Halt or unhalt the async dma engines context switch (NAVI10). + */ +static void sdma_v5_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl, phase_quantum = 0; + int i; + + if (amdgpu_sdma_phase_quantum) { + unsigned value = amdgpu_sdma_phase_quantum; + unsigned unit = 0; + + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { + value = (value + 1) >> 1; + unit++; + } + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); + WARN_ONCE(1, + "clamping sdma_phase_quantum to %uK clock cycles\n", + value << unit); + } + phase_quantum = + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, enable ? 1 : 0); + if (enable && amdgpu_sdma_phase_quantum) { + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), + phase_quantum); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), + phase_quantum); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), + phase_quantum); + } + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + } + +} + +/** + * sdma_v5_0_enable - stop the async dma engines + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs. + * + * Halt or unhalt the async dma engines (NAVI10). + */ +static void sdma_v5_0_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl; + int i; + + if (enable == false) { + sdma_v5_0_gfx_stop(adev); + sdma_v5_0_rlc_stop(adev); + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); + } +} + +/** + * sdma_v5_0_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the gfx DMA ring buffers and enable them (NAVI10). + * Returns 0 for success, error for failure. + */ +static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + u32 rb_cntl, ib_cntl; + u32 rb_bufsz; + u32 wb_offset; + u32 doorbell; + u32 doorbell_offset; + u32 temp; + u32 wptr_poll_cntl; + u64 wptr_gpu_addr; + int i, r; + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + wb_offset = (ring->rptr_offs * 4); + + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); +#ifdef __BIG_ENDIAN + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); +#endif + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); + + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, + mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + F32_POLL_ENABLE, 1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), + wptr_poll_cntl); + + /* set the wb address whether it's enabled or not */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + } + + doorbell = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); + doorbell_offset = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); + + if (ring->use_doorbell) { + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); + } + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); + + adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index, 20); + + if (amdgpu_sriov_vf(adev)) + sdma_v5_0_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + + /* set utc l1 enable flag always to 1 */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); + + /* enable MCBP */ + temp = REG_SET_FIELD(temp, SDMA0_CNTL, MIDCMD_PREEMPT_ENABLE, 1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); + + /* Set up RESP_MODE to non-copy addresses */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_CNTL), temp); + + /* program default cache read and write policy */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + temp &= 0xFF0FFF; + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); + } + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + + ring->sched.ready = true; + + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v5_0_ctx_switch_enable(adev, true); + sdma_v5_0_enable(adev, true); + } + + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + return r; + } + + if (adev->mman.buffer_funcs_ring == ring) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + } + + return 0; +} + +/** + * sdma_v5_0_rlc_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the compute DMA queues and enable them (NAVI10). + * Returns 0 for success, error for failure. + */ +static int sdma_v5_0_rlc_resume(struct amdgpu_device *adev) +{ + return 0; +} + +/** + * sdma_v5_0_load_microcode - load the sDMA ME ucode + * + * @adev: amdgpu_device pointer + * + * Loads the sDMA0/1 ucode. + * Returns 0 for success, -EINVAL if the ucode is not available. + */ +static int sdma_v5_0_load_microcode(struct amdgpu_device *adev) +{ + const struct sdma_firmware_header_v1_0 *hdr; + const __le32 *fw_data; + u32 fw_size; + int i, j; + + /* halt the MEs */ + sdma_v5_0_enable(adev, false); + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (!adev->sdma.instance[i].fw) + return -EINVAL; + + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; + amdgpu_ucode_print_sdma_hdr(&hdr->header); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + fw_data = (const __le32 *) + (adev->sdma.instance[i].fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); + + for (j = 0; j < fw_size; j++) { + if (amdgpu_emu_mode == 1 && j % 500 == 0) + msleep(1); + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); + } + + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); + } + + return 0; +} + +/** + * sdma_v5_0_start - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the DMA engines and enable them (NAVI10). + * Returns 0 for success, error for failure. + */ +static int sdma_v5_0_start(struct amdgpu_device *adev) +{ + int r = 0; + + if (amdgpu_sriov_vf(adev)) { + sdma_v5_0_ctx_switch_enable(adev, false); + sdma_v5_0_enable(adev, false); + + /* set RB registers */ + r = sdma_v5_0_gfx_resume(adev); + return r; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + r = sdma_v5_0_load_microcode(adev); + if (r) + return r; + + /* The value of mmSDMA_F32_CNTL is invalid the moment after loading fw */ + if (amdgpu_emu_mode == 1 && adev->pdev->device == 0x4d) + msleep(1000); + } + + /* unhalt the MEs */ + sdma_v5_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v5_0_ctx_switch_enable(adev, true); + + /* start the gfx rings and rlc compute queues */ + r = sdma_v5_0_gfx_resume(adev); + if (r) + return r; + r = sdma_v5_0_rlc_resume(adev); + + return r; +} + +/** + * sdma_v5_0_ring_test_ring - simple async dma engine test + * + * @ring: amdgpu_ring structure holding ring information + * + * Test the DMA engine by writing using it to write an + * value to memory. (NAVI10). + * Returns 0 for success, error for failure. + */ +static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + unsigned i; + unsigned index; + int r; + u32 tmp; + u64 gpu_addr; + + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + tmp = 0xCAFEDEAD; + adev->wb.wb[index] = cpu_to_le32(tmp); + + r = amdgpu_ring_alloc(ring, 5); + if (r) { + DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); + amdgpu_device_wb_free(adev, index); + return r; + } + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); + amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) + break; + if (amdgpu_emu_mode == 1) + msleep(1); + else + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + if (amdgpu_emu_mode == 1) + DRM_INFO("ring test on %d succeeded in %d msecs\n", ring->idx, i); + else + DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + amdgpu_device_wb_free(adev, index); + + return r; +} + +/** + * sdma_v5_0_ring_test_ib - test an IB on the DMA engine + * + * @ring: amdgpu_ring structure holding ring information + * + * Test a simple IB in the DMA ring (NAVI10). + * Returns 0 on success, error on failure. + */ +static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + unsigned index; + long r; + u32 tmp = 0; + u64 gpu_addr; + + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + tmp = 0xCAFEDEAD; + adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 256, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; + } + + ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib.ptr[1] = lower_32_bits(gpu_addr); + ib.ptr[2] = upper_32_bits(gpu_addr); + ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); + ib.ptr[4] = 0xDEADBEEF; + ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.length_dw = 8; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err1; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out\n"); + r = -ETIMEDOUT; + goto err1; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err1; + } + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } else { + DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + r = -EINVAL; + } + +err1: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err0: + amdgpu_device_wb_free(adev, index); + return r; +} + + +/** + * sdma_v5_0_vm_copy_pte - update PTEs by copying them from the GART + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @src: src addr to copy from + * @count: number of page entries to update + * + * Update PTEs by copying them from the GART using sDMA (NAVI10). + */ +static void sdma_v5_0_vm_copy_pte(struct amdgpu_ib *ib, + uint64_t pe, uint64_t src, + unsigned count) +{ + unsigned bytes = count * 8; + + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = bytes - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src); + ib->ptr[ib->length_dw++] = upper_32_bits(src); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + +} + +/** + * sdma_v5_0_vm_write_pte - update PTEs by writing them manually + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update PTEs by writing them manually using sDMA (NAVI10). + */ +static void sdma_v5_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, + uint64_t value, unsigned count, + uint32_t incr) +{ + unsigned ndw = count * 2; + + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = ndw - 1; + for (; ndw > 0; ndw -= 2) { + ib->ptr[ib->length_dw++] = lower_32_bits(value); + ib->ptr[ib->length_dw++] = upper_32_bits(value); + value += incr; + } +} + +/** + * sdma_v5_0_vm_set_pte_pde - update the page tables using sDMA + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update the page tables using sDMA (NAVI10). + */ +static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint64_t flags) +{ + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ + ib->ptr[ib->length_dw++] = upper_32_bits(flags); + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ +} + +/** + * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw + * + * @ib: indirect buffer to fill with padding + * + */ +static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); + u32 pad_count; + int i; + + pad_count = (8 - (ib->length_dw & 0x7)) % 8; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP); +} + + +/** + * sdma_v5_0_ring_emit_pipeline_sync - sync the pipeline + * + * @ring: amdgpu_ring pointer + * + * Make sure all previous operations are completed (CIK). + */ +static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + /* wait for idle */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); /* reference */ + amdgpu_ring_write(ring, 0xfffffff); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ +} + + +/** + * sdma_v5_0_ring_emit_vm_flush - vm flush using sDMA + * + * @ring: amdgpu_ring pointer + * @vm: amdgpu_vm pointer + * + * Update the page table base and flush the VM TLB + * using sDMA (NAVI10). + */ +static void sdma_v5_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); +} + +static void sdma_v5_0_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, val); +} + +static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); /* reference */ + amdgpu_ring_write(ring, mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); +} + +static int sdma_v5_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->sdma.num_instances = 2; + + sdma_v5_0_set_ring_funcs(adev); + sdma_v5_0_set_buffer_funcs(adev); + sdma_v5_0_set_vm_pte_funcs(adev); + sdma_v5_0_set_irq_funcs(adev); + + return 0; +} + + +static int sdma_v5_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int r, i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* SDMA trap event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, + SDMA0_5_0__SRCID__SDMA_TRAP, + &adev->sdma.trap_irq); + if (r) + return r; + + /* SDMA trap event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, + SDMA1_5_0__SRCID__SDMA_TRAP, + &adev->sdma.trap_irq); + if (r) + return r; + + r = sdma_v5_0_init_microcode(adev); + if (r) { + DRM_ERROR("Failed to load sdma firmware!\n"); + return r; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + ring->ring_obj = NULL; + ring->use_doorbell = true; + + DRM_INFO("use_doorbell being set to: [%s]\n", + ring->use_doorbell?"true":"false"); + + ring->doorbell_index = (i == 0) ? + (adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset + : (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset + + sprintf(ring->name, "sdma%d", i); + r = amdgpu_ring_init(adev, ring, 1024, + &adev->sdma.trap_irq, + (i == 0) ? + AMDGPU_SDMA_IRQ_INSTANCE0 : + AMDGPU_SDMA_IRQ_INSTANCE1); + if (r) + return r; + } + + return r; +} + +static int sdma_v5_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) + amdgpu_ring_fini(&adev->sdma.instance[i].ring); + + return 0; +} + +static int sdma_v5_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + sdma_v5_0_init_golden_registers(adev); + + r = sdma_v5_0_start(adev); + + return r; +} + +static int sdma_v5_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + sdma_v5_0_ctx_switch_enable(adev, false); + sdma_v5_0_enable(adev, false); + + return 0; +} + +static int sdma_v5_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return sdma_v5_0_hw_fini(adev); +} + +static int sdma_v5_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return sdma_v5_0_hw_init(adev); +} + +static bool sdma_v5_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + u32 tmp = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); + + if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) + return false; + } + + return true; +} + +static int sdma_v5_0_wait_for_idle(void *handle) +{ + unsigned i; + u32 sdma0, sdma1; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + sdma0 = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); + sdma1 = RREG32(sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); + + if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static int sdma_v5_0_soft_reset(void *handle) +{ + /* todo */ + + return 0; +} + +static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring) +{ + int i, r = 0; + struct amdgpu_device *adev = ring->adev; + u32 index = 0; + u64 sdma_gfx_preempt; + + amdgpu_sdma_get_index_from_ring(ring, &index); + if (index == 0) + sdma_gfx_preempt = mmSDMA0_GFX_PREEMPT; + else + sdma_gfx_preempt = mmSDMA1_GFX_PREEMPT; + + /* assert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, false); + + /* emit the trailing fence */ + ring->trail_seq += 1; + amdgpu_ring_alloc(ring, 10); + sdma_v5_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, + ring->trail_seq, 0); + amdgpu_ring_commit(ring); + + /* assert IB preemption */ + WREG32(sdma_gfx_preempt, 1); + + /* poll the trailing fence */ + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->trail_seq == + le32_to_cpu(*(ring->trail_fence_cpu_addr))) + break; + DRM_UDELAY(1); + } + + if (i >= adev->usec_timeout) { + r = -EINVAL; + DRM_ERROR("ring %d failed to be preempted\n", ring->idx); + } + + /* deassert IB preemption */ + WREG32(sdma_gfx_preempt, 0); + + /* deassert the preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, true); + return r; +} + +static int sdma_v5_0_set_trap_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 sdma_cntl; + + u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ? + sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) : + sdma_v5_0_get_reg_offset(adev, 1, mmSDMA0_CNTL); + + sdma_cntl = RREG32(reg_offset); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32(reg_offset, sdma_cntl); + + return 0; +} + +static int sdma_v5_0_process_trap_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: SDMA trap\n"); + switch (entry->client_id) { + case SOC15_IH_CLIENTID_SDMA0: + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[0].ring); + break; + case 1: + /* XXX compute */ + break; + case 2: + /* XXX compute */ + break; + case 3: + /* XXX page queue*/ + break; + } + break; + case SOC15_IH_CLIENTID_SDMA1: + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[1].ring); + break; + case 1: + /* XXX compute */ + break; + case 2: + /* XXX compute */ + break; + case 3: + /* XXX page queue*/ + break; + } + break; + } + return 0; +} + +static int sdma_v5_0_process_illegal_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + return 0; +} + +static void sdma_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { + /* Enable sdma clock gating */ + def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); + data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); + if (def != data) + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data); + } else { + /* Disable sdma clock gating */ + def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); + data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); + if (def != data) + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL), data); + } + } +} + +static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { + /* Enable sdma mem light sleep */ + def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); + data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data); + + } else { + /* Disable sdma mem light sleep */ + def = data = RREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); + data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL), data); + + } + } +} + +static int sdma_v5_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_NAVI10: + sdma_v5_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + sdma_v5_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +static int sdma_v5_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_SDMA_MGCG */ + data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL)); + if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK)) + *flags |= AMD_CG_SUPPORT_SDMA_MGCG; + + /* AMD_CG_SUPPORT_SDMA_LS */ + data = RREG32(sdma_v5_0_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL)); + if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK) + *flags |= AMD_CG_SUPPORT_SDMA_LS; +} + +const struct amd_ip_funcs sdma_v5_0_ip_funcs = { + .name = "sdma_v5_0", + .early_init = sdma_v5_0_early_init, + .late_init = NULL, + .sw_init = sdma_v5_0_sw_init, + .sw_fini = sdma_v5_0_sw_fini, + .hw_init = sdma_v5_0_hw_init, + .hw_fini = sdma_v5_0_hw_fini, + .suspend = sdma_v5_0_suspend, + .resume = sdma_v5_0_resume, + .is_idle = sdma_v5_0_is_idle, + .wait_for_idle = sdma_v5_0_wait_for_idle, + .soft_reset = sdma_v5_0_soft_reset, + .set_clockgating_state = sdma_v5_0_set_clockgating_state, + .set_powergating_state = sdma_v5_0_set_powergating_state, + .get_clockgating_state = sdma_v5_0_get_clockgating_state, +}; + +static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_GFXHUB, + .get_rptr = sdma_v5_0_ring_get_rptr, + .get_wptr = sdma_v5_0_ring_get_wptr, + .set_wptr = sdma_v5_0_ring_set_wptr, + .emit_frame_size = + 5 + /* sdma_v5_0_ring_init_cond_exec */ + 6 + /* sdma_v5_0_ring_emit_hdp_flush */ + 3 + /* hdp_invalidate */ + 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ + /* sdma_v5_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ + .emit_ib = sdma_v5_0_ring_emit_ib, + .emit_fence = sdma_v5_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v5_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v5_0_ring_emit_hdp_flush, + .test_ring = sdma_v5_0_ring_test_ring, + .test_ib = sdma_v5_0_ring_test_ib, + .insert_nop = sdma_v5_0_ring_insert_nop, + .pad_ib = sdma_v5_0_ring_pad_ib, + .emit_wreg = sdma_v5_0_ring_emit_wreg, + .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, + .init_cond_exec = sdma_v5_0_ring_init_cond_exec, + .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, + .preempt_ib = sdma_v5_0_ring_preempt_ib, +}; + +static void sdma_v5_0_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + adev->sdma.instance[i].ring.funcs = &sdma_v5_0_ring_funcs; + adev->sdma.instance[i].ring.me = i; + } +} + +static const struct amdgpu_irq_src_funcs sdma_v5_0_trap_irq_funcs = { + .set = sdma_v5_0_set_trap_irq_state, + .process = sdma_v5_0_process_trap_irq, +}; + +static const struct amdgpu_irq_src_funcs sdma_v5_0_illegal_inst_irq_funcs = { + .process = sdma_v5_0_process_illegal_inst_irq, +}; + +static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.trap_irq.funcs = &sdma_v5_0_trap_irq_funcs; + adev->sdma.illegal_inst_irq.funcs = &sdma_v5_0_illegal_inst_irq_funcs; +} + +/** + * sdma_v5_0_emit_copy_buffer - copy buffer using the sDMA engine + * + * @ring: amdgpu_ring structure holding ring information + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * + * Copy GPU buffers using the DMA engine (NAVI10). + * Used by the amdgpu ttm implementation to move pages if + * registered as the asic copy callback. + */ +static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, + uint64_t src_offset, + uint64_t dst_offset, + uint32_t byte_count) +{ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = byte_count - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); +} + +/** + * sdma_v5_0_emit_fill_buffer - fill buffer using the sDMA engine + * + * @ring: amdgpu_ring structure holding ring information + * @src_data: value to write to buffer + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * + * Fill GPU buffers using the DMA engine (NAVI10). + */ +static void sdma_v5_0_emit_fill_buffer(struct amdgpu_ib *ib, + uint32_t src_data, + uint64_t dst_offset, + uint32_t byte_count) +{ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count - 1; +} + +static const struct amdgpu_buffer_funcs sdma_v5_0_buffer_funcs = { + .copy_max_bytes = 0x400000, + .copy_num_dw = 7, + .emit_copy_buffer = sdma_v5_0_emit_copy_buffer, + + .fill_max_bytes = 0x400000, + .fill_num_dw = 5, + .emit_fill_buffer = sdma_v5_0_emit_fill_buffer, +}; + +static void sdma_v5_0_set_buffer_funcs(struct amdgpu_device *adev) +{ + if (adev->mman.buffer_funcs == NULL) { + adev->mman.buffer_funcs = &sdma_v5_0_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; + } +} + +static const struct amdgpu_vm_pte_funcs sdma_v5_0_vm_pte_funcs = { + .copy_pte_num_dw = 7, + .copy_pte = sdma_v5_0_vm_copy_pte, + .write_pte = sdma_v5_0_vm_write_pte, + .set_pte_pde = sdma_v5_0_vm_set_pte_pde, +}; + +static void sdma_v5_0_set_vm_pte_funcs(struct amdgpu_device *adev) +{ + struct drm_gpu_scheduler *sched; + unsigned i; + + if (adev->vm_manager.vm_pte_funcs == NULL) { + adev->vm_manager.vm_pte_funcs = &sdma_v5_0_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + } +} + +const struct amdgpu_ip_block_version sdma_v5_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 5, + .minor = 0, + .rev = 0, + .funcs = &sdma_v5_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h new file mode 100644 index 000000000000..d5a94e3d181c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.h @@ -0,0 +1,45 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SDMA_V5_0_H__ +#define __SDMA_V5_0_H__ + +enum sdma_v5_0_utcl2_cache_read_policy { + CACHE_READ_POLICY_L2__LRU = 0x00000000, + CACHE_READ_POLICY_L2__STREAM = 0x00000001, + CACHE_READ_POLICY_L2__NOA = 0x00000002, + CACHE_READ_POLICY_L2__DEFAULT = CACHE_READ_POLICY_L2__NOA, +}; + +enum sdma_v5_0_utcl2_cache_write_policy { + CACHE_WRITE_POLICY_L2__LRU = 0x00000000, + CACHE_WRITE_POLICY_L2__STREAM = 0x00000001, + CACHE_WRITE_POLICY_L2__NOA = 0x00000002, + CACHE_WRITE_POLICY_L2__BYPASS = 0x00000003, + CACHE_WRITE_POLICY_L2__DEFAULT = CACHE_WRITE_POLICY_L2__BYPASS, +}; + +extern const struct amd_ip_funcs sdma_v5_0_ip_funcs; +extern const struct amdgpu_ip_block_version sdma_v5_0_ip_block; + +#endif /* __SDMA_V5_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 5e1a2528df7f..4d74453f3cfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1340,8 +1340,8 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, /* This reports 0 on APUs, so return to avoid writing/reading registers * that may or may not be different from their GPU counterparts */ - if (adev->flags & AMD_IS_APU) - return; + if (adev->flags & AMD_IS_APU) + return; /* Set the 2 events that we wish to watch, defined above */ /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b769995c3029..23265414d448 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -275,15 +275,6 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, return true; } -struct soc15_allowed_register_entry { - uint32_t hwip; - uint32_t inst; - uint32_t seg; - uint32_t reg_offset; - bool grbm_indexed; -}; - - static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)}, @@ -388,7 +379,7 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, } else { tmp = RREG32(reg); tmp &= ~(entry->and_mask); - tmp |= entry->or_mask; + tmp |= (entry->or_mask & entry->and_mask); } if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) || @@ -658,8 +649,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); -#else -# warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev))) { amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block); @@ -680,8 +669,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); -#else -# warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block); break; @@ -722,13 +709,19 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, /* This reports 0 on APUs, so return to avoid writing/reading registers * that may or may not be different from their GPU counterparts */ - if (adev->flags & AMD_IS_APU) - return; + if (adev->flags & AMD_IS_APU) + return; /* Set the 2 events that we wish to watch, defined above */ - /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ + /* Reg 40 is # received msgs */ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); - perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); + /* Pre-VG20, Reg 104 is # of posted requests sent. On VG20 it's 108 */ + if (adev->asic_type == CHIP_VEGA20) + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, + EVENT1_SEL, 108); + else + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, + EVENT1_SEL, 104); /* Write to enable desired perf counters */ WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr); @@ -1035,6 +1028,8 @@ static int soc15_common_sw_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_add_irq_id(adev); + adev->df_funcs->sw_init(adev); + return 0; } @@ -1081,6 +1076,7 @@ static int soc15_common_hw_init(void *handle) */ if (adev->nbio_funcs->remap_hdp_registers) adev->nbio_funcs->remap_hdp_registers(adev); + /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); /* HW doorbell routing policy: doorbell writing not diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 06f39f5bbf76..7a6b2cc6d9f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -48,6 +48,16 @@ struct soc15_reg_entry { uint32_t seg; uint32_t reg_offset; uint32_t reg_value; + uint32_t se_num; + uint32_t instance; +}; + +struct soc15_allowed_register_entry { + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + bool grbm_indexed; }; #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 0b4e7b55595a..ca7d05993ca2 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -1,29 +1,26 @@ -/****************************************************************************\ -* -* File Name ta_ras_if.h -* Project AMD PSP SW IP Module -* -* Description Interface to the RAS Trusted Application -* -* Copyright 2019 Advanced Micro Devices, Inc. -* -* Permission is hereby granted, free of charge, to any person obtaining a copy of this software -* and associated documentation files (the "Software"), to deal in the Software without restriction, -* including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, -* subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in all copies or substantial -* portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE. -*/ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + #ifndef _TA_RAS_IF_H #define _TA_RAS_IF_H @@ -31,8 +28,8 @@ #define RSP_ID_MASK (1U << 31) #define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) -#define TA_NUM_BLOCK_MAX 14 - +/* RAS related enumerations */ +/**********************************************************/ enum ras_command { TA_RAS_COMMAND__ENABLE_FEATURES = 0, TA_RAS_COMMAND__DISABLE_FEATURES, @@ -45,7 +42,12 @@ enum ta_ras_status { TA_RAS_STATUS__ERROR_INVALID_PARAMETER = 0x02, TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE = 0x03, TA_RAS_STATUS__ERROR_RAS_DUPLICATE_CMD = 0x04, - TA_RAS_STATUS__ERROR_INJECTION_FAILED = 0x05 + TA_RAS_STATUS__ERROR_INJECTION_FAILED = 0x05, + TA_RAS_STATUS__ERROR_ASD_READ_WRITE = 0x06, + TA_RAS_STATUS__ERROR_TOGGLE_DF_CSTATE = 0x07, + TA_RAS_STATUS__ERROR_TIMEOUT = 0x08, + TA_RAS_STATUS__ERROR_BLOCK_DISABLED = 0x09, + TA_RAS_STATUS__ERROR_GENERIC = 0x10, }; enum ta_ras_block { @@ -62,47 +64,55 @@ enum ta_ras_block { TA_RAS_BLOCK__SEM, TA_RAS_BLOCK__MP0, TA_RAS_BLOCK__MP1, - TA_RAS_BLOCK__FUSE = (TA_NUM_BLOCK_MAX - 1), + TA_RAS_BLOCK__FUSE, + TA_NUM_BLOCK_MAX }; enum ta_ras_error_type { - TA_RAS_ERROR__NONE = 0, - TA_RAS_ERROR__PARITY = 1, - TA_RAS_ERROR__SINGLE_CORRECTABLE = 2, - TA_RAS_ERROR__MULTI_UNCORRECTABLE = 4, - TA_RAS_ERROR__POISON = 8 + TA_RAS_ERROR__NONE = 0, + TA_RAS_ERROR__PARITY = 1, + TA_RAS_ERROR__SINGLE_CORRECTABLE = 2, + TA_RAS_ERROR__MULTI_UNCORRECTABLE = 4, + TA_RAS_ERROR__POISON = 8, }; +/* Input/output structures for RAS commands */ +/**********************************************************/ + struct ta_ras_enable_features_input { - enum ta_ras_block block_id; - enum ta_ras_error_type error_type; + enum ta_ras_block block_id; + enum ta_ras_error_type error_type; }; struct ta_ras_disable_features_input { - enum ta_ras_block block_id; - enum ta_ras_error_type error_type; + enum ta_ras_block block_id; + enum ta_ras_error_type error_type; }; struct ta_ras_trigger_error_input { - enum ta_ras_block block_id; - enum ta_ras_error_type inject_error_type; - uint32_t sub_block_index; - uint64_t address; - uint64_t value; + enum ta_ras_block block_id; // ras-block. i.e. umc, gfx + enum ta_ras_error_type inject_error_type; // type of error. i.e. single_correctable + uint32_t sub_block_index; // mem block. i.e. hbm, sram etc. + uint64_t address; // explicit address of error + uint64_t value; // method if error injection. i.e persistent, coherent etc. }; +/* Common input structure for RAS callbacks */ +/**********************************************************/ union ta_ras_cmd_input { struct ta_ras_enable_features_input enable_features; struct ta_ras_disable_features_input disable_features; struct ta_ras_trigger_error_input trigger_error; }; +/* Shared Memory structures */ +/**********************************************************/ struct ta_ras_shared_memory { - uint32_t cmd_id; - uint32_t resp_id; - enum ta_ras_status ras_status; - uint32_t reserved; - union ta_ras_cmd_input ras_in_message; + uint32_t cmd_id; + uint32_t resp_id; + enum ta_ras_status ras_status; + uint32_t reserved; + union ta_ras_cmd_input ras_in_message; }; #endif // TL_RAS_IF_H_ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index d30ff256ff57..dde22b7d140d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -128,6 +128,17 @@ static int vcn_v1_0_sw_init(void *handle) if (r) return r; + adev->vcn.internal.scratch9 = adev->vcn.external.scratch9 = + SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); + adev->vcn.internal.data0 = adev->vcn.external.data0 = + SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.internal.data1 = adev->vcn.external.data1 = + SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.internal.cmd = adev->vcn.external.cmd = + SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.internal.nop = adev->vcn.external.nop = + SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.ring_enc[i]; sprintf(ring->name, "vcn_enc%d", i); @@ -143,6 +154,8 @@ static int vcn_v1_0_sw_init(void *handle) return r; adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; + adev->vcn.internal.jpeg_pitch = adev->vcn.external.jpeg_pitch = + SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c new file mode 100644 index 000000000000..988c0adaca91 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -0,0 +1,2261 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "soc15.h" +#include "soc15d.h" +#include "amdgpu_pm.h" +#include "amdgpu_psp.h" + +#include "vcn/vcn_2_0_0_offset.h" +#include "vcn/vcn_2_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd +#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503 +#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x504 +#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x505 +#define mmUVD_NO_OP_INTERNAL_OFFSET 0x53f +#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x54a +#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d + +#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x1e1 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x5a6 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 +#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 + +#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff +#define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029 +#define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a +#define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb +#define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf +#define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1 +#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8 +#define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9 +#define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082 +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed +#define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085 +#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 +#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 +#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + +#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 + +#define mmUVD_RBC_XX_IB_REG_CHECK 0x026b +#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 +#define mmUVD_REG_XX_MASK 0x026c +#define mmUVD_REG_XX_MASK_BASE_IDX 1 + +static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v2_0_set_powergating_state(void *handle, + enum amd_powergating_state state); +static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); + +/** + * vcn_v2_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int vcn_v2_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->vcn.num_enc_rings = 2; + + vcn_v2_0_set_dec_ring_funcs(adev); + vcn_v2_0_set_enc_ring_funcs(adev); + vcn_v2_0_set_jpeg_ring_funcs(adev); + vcn_v2_0_set_irq_funcs(adev); + + return 0; +} + +/** + * vcn_v2_0_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v2_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int i, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* VCN DEC TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, + &adev->vcn.irq); + if (r) + return r; + + /* VCN ENC TRAP */ + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, + &adev->vcn.irq); + if (r) + return r; + } + + /* VCN JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_2_0__SRCID__JPEG_DECODE, + &adev->vcn.irq); + if (r) + return r; + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + DRM_INFO("PSP loading VCN firmware\n"); + } + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + ring = &adev->vcn.ring_dec; + + ring->use_doorbell = true; + ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; + + sprintf(ring->name, "vcn_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + if (r) + return r; + + adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; + adev->vcn.external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); + adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; + adev->vcn.external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; + adev->vcn.external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; + adev->vcn.external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; + adev->vcn.external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.ring_enc[i]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; + sprintf(ring->name, "vcn_enc%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + if (r) + return r; + } + + ring = &adev->vcn.ring_jpeg; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + sprintf(ring->name, "vcn_jpeg"); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + if (r) + return r; + + adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode; + + adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->vcn.external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); + + return 0; +} + +/** + * vcn_v2_0_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v2_0_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v2_0_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v2_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + int i, r; + + adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index); + + ring->sched.ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.ring_enc[i]; + ring->sched.ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } + } + + ring = &adev->vcn.ring_jpeg; + ring->sched.ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } + +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); + + return r; +} + +/** + * vcn_v2_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v2_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + int i; + + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, 0, mmUVD_STATUS))) + vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + + ring->sched.ready = false; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.ring_enc[i]; + ring->sched.ready = false; + } + + ring = &adev->vcn.ring_jpeg; + ring->sched.ready = false; + + return 0; +} + +/** + * vcn_v2_0_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v2_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v2_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v2_0_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v2_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v2_0_hw_init(adev); + + return r; +} + +/** + * vcn_v2_0_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr)); + offset = size; + /* No signed header for now from firmware + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + */ + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0); + } + + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr + offset)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr + offset)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); +} + +static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.gpu_addr), 0, indirect); + offset = size; + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + +/** + * vcn_v2_0_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Disable clock gating for VCN block + */ +static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data; + + /* UVD disable CGC */ + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__SCPU_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + /* turn on */ + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__UVD_SC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); +} + +static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, + uint8_t sram_sel, uint8_t indirect) +{ + uint32_t reg_data = 0; + + /* enable sw clock gating control */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); +} + +/** + * jpeg_v2_0_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v2_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->vcn.ring_jpeg; + uint32_t tmp; + int r = 0; + + /* disable power gating */ + tmp = 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp); + + SOC15_WAIT_ON_RREG(VCN, 0, + mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS_UVDJ_PWR_ON, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); + + if (r) { + DRM_ERROR("amdgpu: JPEG disable power gating failed\n"); + return r; + } + + /* Removing the anti hang mechanism to indicate the UVDJ tile is ON */ + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)) & ~0x1; + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp); + + /* JPEG disable CGC */ + tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); + tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); + + tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); + tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JPEG_ENC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(VCN, 0, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v2_0_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v2_0_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int r = 0; + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable JPEG CGC */ + tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); + tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, tmp); + + + tmp = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); + tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JPEG_ENC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, tmp); + + /* enable power gating */ + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS)); + tmp &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK; + tmp |= 0x1; //UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_TILES_OFF; + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_POWER_STATUS), tmp); + + tmp = 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT; + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_PGFSM_CONFIG), tmp); + + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, + (2 << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT), + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK, r); + + if (r) { + DRM_ERROR("amdgpu: JPEG enable power gating failed\n"); + return r; + } + + return r; +} + +/** + * vcn_v2_0_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Enable clock gating for VCN block + */ +static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + /* enable UVD CGC */ + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); +} + +static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int ret; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT); + + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON_2_0, 0xFFFFF, ret); + } else { + data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT); + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFF, ret); + } + + /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS, + * UVDU_PWR_STATUS are 0 (power on) */ + + data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS); + data &= ~0x103; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) + data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | + UVD_POWER_STATUS__UVD_PG_EN_MASK; + + WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); +} + +static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int ret; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + /* Before power off, this indicator has to be turned on */ + data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS); + data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK; + data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); + + + data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT); + + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + + data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFF, ret); + } +} + +static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) +{ + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + uint32_t rb_bufsz, tmp; + + vcn_v2_0_enable_static_power_gating(adev); + + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp); + + if (indirect) + adev->vcn.dpg_sram_curr_addr = (uint32_t*)adev->vcn.dpg_sram_cpu_addr; + + /* enable clock gating */ + vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect); + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK; + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interupt */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MASTINT_EN), 0, 0, indirect); + + /* setup mmUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_CTRL), tmp, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_CNTL), + 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUXA0), + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUXB0), + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MPC_SET_MUX), + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); + + vcn_v2_0_mc_resume_dpg_mode(adev, indirect); + + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect); + + /* release VCPU reset to boot */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_SOFT_RESET), 0, 0, indirect); + + /* enable LMI MC and UMC channels */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_LMI_CTRL2), + 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( + UVD, 0, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + psp_update_vcn_sram(adev, 0, adev->vcn.dpg_sram_gpu_addr, + (uint32_t)((uintptr_t)adev->vcn.dpg_sram_curr_addr - + (uintptr_t)adev->vcn.dpg_sram_cpu_addr)); + + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); + + WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, 0); + + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + return 0; +} + +static int vcn_v2_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + uint32_t rb_bufsz, tmp; + uint32_t lmi_swap_cntl; + int i, j, r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram); + if (r) + return r; + goto jpeg; + } + + vcn_v2_0_disable_static_power_gating(adev); + + /* set uvd status busy */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp); + + /*SW clock gating */ + vcn_v2_0_disable_clock_gating(adev); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* setup mmUVD_LMI_CTRL */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL); + WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup mmUVD_MPC_CNTL */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, 0, mmUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup mmUVD_MPC_SET_MUX */ + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + + vcn_v2_0_mc_resume(adev); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, 0, mmUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, 0, mmUVD_SOFT_RESET, tmp); + + /* disable byte swapping */ + lmi_swap_cntl = 0; +#ifdef __BIG_ENDIAN + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; +#endif + WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); + + for (i = 0; i < 10; ++i) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("VCN decode not responding, giving up!!!\n"); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_VMID, 0); + + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vcn.ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + +jpeg: + r = jpeg_v2_0_start(adev); + + return r; +} + +static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev) +{ + int ret_code = 0; + uint32_t tmp; + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return 0; +} + +static int vcn_v2_0_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int r; + + r = jpeg_v2_0_stop(adev); + if (r) + return r; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_0_stop_dpg_mode(adev); + if (r) + return r; + goto power_off; + } + + /* wait for uvd idle */ + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* stall UMC channel */ + tmp = RREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, 0, mmUVD_LMI_CTRL2, tmp); + + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* reset LMI UMC */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); + + /* reset LMI */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__LMI_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + + /* clear status */ + WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0); + + vcn_v2_0_enable_clock_gating(adev); + vcn_v2_0_enable_static_power_gating(adev); + +power_off: + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state) +{ + struct amdgpu_ring *ring; + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d -> %d", + adev->vcn.pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + +static bool vcn_v2_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE); +} + +static int vcn_v2_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 0; + + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE, ret); + + return ret; +} + +static int vcn_v2_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + /* wait for STATUS to clear */ + if (vcn_v2_0_is_idle(handle)) + return -EBUSY; + vcn_v2_0_enable_clock_gating(adev); + } else { + /* disable HW gating and enable Sw gating */ + vcn_v2_0_disable_clock_gating(adev); + } + return 0; +} + +/** + * vcn_v2_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); +} + +/** + * vcn_v2_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); +} + +/** + * vcn_v2_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, + lower_32_bits(ring->wptr) | 0x80000000); + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +/** + * vcn_v2_0_dec_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1); +} + +/** + * vcn_v2_0_dec_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1); +} + +/** + * vcn_v2_0_dec_ring_insert_nop - insert a nop command + * + * @ring: amdgpu_ring pointer + * + * Write a nop command to the ring. + */ +static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, 0); + } +} + +/** + * vcn_v2_0_dec_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, addr & 0xffffffff); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + + amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1); +} + +/** + * vcn_v2_0_dec_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer + */ +static void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, vmid); + + amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, ib->length_dw); +} + +static void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val, + uint32_t mask) +{ + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, reg << 2); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, mask); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + + amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1); +} + +static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, reg << 2); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + + amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1); +} + +/** + * vcn_v2_0_enc_ring_get_rptr - get enc read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc read pointer + */ +static uint64_t vcn_v2_0_enc_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.ring_enc[0]) + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); + else + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); +} + + /** + * vcn_v2_0_enc_ring_get_wptr - get enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc write pointer + */ +static uint64_t vcn_v2_0_enc_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.ring_enc[0]) { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); + } else { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); + } +} + + /** + * vcn_v2_0_enc_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.ring_enc[0]) { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } + } else { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + } + } +} + +/** + * vcn_v2_0_enc_ring_emit_fence - emit an enc fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write enc a fence and a trap command to the ring. + */ +static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, VCN_ENC_CMD_FENCE); + amdgpu_ring_write(ring, addr); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP); +} + +static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, VCN_ENC_CMD_END); +} + +/** + * vcn_v2_0_enc_ring_emit_ib - enc execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write enc ring commands to execute the indirect buffer + */ +static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, VCN_ENC_CMD_IB); + amdgpu_ring_write(ring, vmid); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); +} + +static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val, + uint32_t mask) +{ + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + +static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for reg writes */ + vcn_v2_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2, + lower_32_bits(pd_addr), 0xffffffff); +} + +static void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +/** + * vcn_v2_0_jpeg_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR); +} + +/** + * vcn_v2_0_jpeg_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); +} + +/** + * vcn_v2_0_jpeg_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +/** + * vcn_v2_0_jpeg_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x80010000); +} + +/** + * vcn_v2_0_jpeg_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x00010000); +} + +/** + * vcn_v2_0_jpeg_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x3fbc); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x1); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); + amdgpu_ring_write(ring, 0); +} + +/** + * vcn_v2_0_jpeg_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer. + */ +static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, ib->length_dw); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_STATUS_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); + amdgpu_ring_write(ring, 0x2); +} + +static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val, + uint32_t mask) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE3)); + } + amdgpu_ring_write(ring, mask); +} + +static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + } + amdgpu_ring_write(ring, val); +} + +static void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } +} + +static int vcn_v2_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: + amdgpu_fence_process(&adev->vcn.ring_dec); + break; + case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.ring_enc[0]); + break; + case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: + amdgpu_fence_process(&adev->vcn.ring_enc[1]); + break; + case VCN_2_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->vcn.ring_jpeg); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static int vcn_v2_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + /* This doesn't actually powergate the VCN block. + * That's done in the dpm code via the SMC. This + * just re-inits the block as necessary. The actual + * gating still happens in the dpm code. We should + * revisit this when there is a cleaner line between + * the smc and the hw blocks + */ + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v2_0_stop(adev); + else + ret = vcn_v2_0_start(adev); + + if (!ret) + adev->vcn.cur_state = state; + return ret; +} + +static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { + .name = "vcn_v2_0", + .early_init = vcn_v2_0_early_init, + .late_init = NULL, + .sw_init = vcn_v2_0_sw_init, + .sw_fini = vcn_v2_0_sw_fini, + .hw_init = vcn_v2_0_hw_init, + .hw_fini = vcn_v2_0_hw_fini, + .suspend = vcn_v2_0_suspend, + .resume = vcn_v2_0_resume, + .is_idle = vcn_v2_0_is_idle, + .wait_for_idle = vcn_v2_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v2_0_set_clockgating_state, + .set_powergating_state = vcn_v2_0_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB, + .get_rptr = vcn_v2_0_dec_ring_get_rptr, + .get_wptr = vcn_v2_0_dec_ring_get_wptr, + .set_wptr = vcn_v2_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ + .emit_ib = vcn_v2_0_dec_ring_emit_ib, + .emit_fence = vcn_v2_0_dec_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ib = amdgpu_vcn_dec_ring_test_ib, + .insert_nop = vcn_v2_0_dec_ring_insert_nop, + .insert_start = vcn_v2_0_dec_ring_insert_start, + .insert_end = vcn_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .vmhub = AMDGPU_MMHUB, + .get_rptr = vcn_v2_0_enc_ring_get_rptr, + .get_wptr = vcn_v2_0_enc_ring_get_wptr, + .set_wptr = vcn_v2_0_enc_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_enc_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB, + .get_rptr = vcn_v2_0_jpeg_ring_get_rptr, + .get_wptr = vcn_v2_0_jpeg_ring_get_wptr, + .set_wptr = vcn_v2_0_jpeg_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */ + 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */ + .emit_ib = vcn_v2_0_jpeg_ring_emit_ib, + .emit_fence = vcn_v2_0_jpeg_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_jpeg_ring_test_ring, + .test_ib = amdgpu_vcn_jpeg_ring_test_ib, + .insert_nop = vcn_v2_0_jpeg_ring_nop, + .insert_start = vcn_v2_0_jpeg_ring_insert_start, + .insert_end = vcn_v2_0_jpeg_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->vcn.ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; + DRM_INFO("VCN decode is enabled in VM mode\n"); +} + +static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + adev->vcn.ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; + + DRM_INFO("VCN encode is enabled in VM mode\n"); +} + +static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) +{ + adev->vcn.ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs; + DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = { + .set = vcn_v2_0_set_interrupt_state, + .process = vcn_v2_0_process_interrupt, +}; + +static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.irq.funcs = &vcn_v2_0_irq_funcs; +} + +const struct amdgpu_ip_block_version vcn_v2_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vcn_v2_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h new file mode 100644 index 000000000000..a74227f4663b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V2_0_H__ +#define __VCN_V2_0_H__ + +extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block; + +#endif /* __VCN_V2_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index d40ed1a828dd..6575ddcfcf00 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -59,7 +59,6 @@ #include "vid.h" #include "vi.h" -#include "vi_dpm.h" #include "gmc_v8_0.h" #include "gmc_v7_0.h" #include "gfx_v8_0.h" |