diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
131 files changed, 10334 insertions, 2014 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index f6e5c0282fc1..2e98c016cb47 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -27,7 +27,9 @@ config DRM_AMDGPU_CIK  config DRM_AMDGPU_USERPTR  	bool "Always enable userptr write support"  	depends on DRM_AMDGPU -	depends on HMM_MIRROR +	depends on MMU +	select HMM_MIRROR +	select MMU_NOTIFIER  	help  	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it  	  isn't already selected to enabled full userptr support. diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 56e084367b93..42e2c1f57152 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -54,7 +54,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \  	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \  	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \  	amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ -	amdgpu_vm_sdma.o amdgpu_discovery.o +	amdgpu_vm_sdma.o amdgpu_pmu.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o  amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o @@ -66,7 +66,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce  amdgpu-y += \  	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ -	vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o +	vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ +	arct_reg_init.o navi12_reg_init.o  # add DF block  amdgpu-y += \ @@ -77,9 +78,13 @@ amdgpu-y += \  amdgpu-y += \  	gmc_v7_0.o \  	gmc_v8_0.o \ -	gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o \ +	gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \  	gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o +# add UMC block +amdgpu-y += \ +	umc_v6_1.o +  # add IH block  amdgpu-y += \  	amdgpu_irq.o \ @@ -95,7 +100,8 @@ amdgpu-y += \  	amdgpu_psp.o \  	psp_v3_1.o \  	psp_v10_0.o \ -	psp_v11_0.o +	psp_v11_0.o \ +	psp_v12_0.o  # add SMC block  amdgpu-y += \ @@ -144,10 +150,12 @@ amdgpu-y += \  amdgpu-y += \  	amdgpu_vcn.o \  	vcn_v1_0.o \ -	vcn_v2_0.o +	vcn_v2_0.o \ +	vcn_v2_5.o  # add ATHUB block  amdgpu-y += \ +	athub_v1_0.o \  	athub_v2_0.o  # add amdkfd interfaces @@ -162,6 +170,7 @@ amdgpu-y += \  	 amdgpu_amdkfd_gpuvm.o \  	 amdgpu_amdkfd_gfx_v8.o \  	 amdgpu_amdkfd_gfx_v9.o \ +	 amdgpu_amdkfd_arcturus.o \  	 amdgpu_amdkfd_gfx_v10.o  ifneq ($(CONFIG_DRM_AMDGPU_CIK),) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8199d201b43a..bd37df5dd6d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -86,6 +86,8 @@  #include "amdgpu_smu.h"  #include "amdgpu_discovery.h"  #include "amdgpu_mes.h" +#include "amdgpu_umc.h" +#include "amdgpu_mmhub.h"  #define MAX_GPU_INSTANCE		16 @@ -532,6 +534,14 @@ struct amdgpu_allowed_register_entry {  	bool grbm_indexed;  }; +enum amd_reset_method { +	AMD_RESET_METHOD_LEGACY = 0, +	AMD_RESET_METHOD_MODE0, +	AMD_RESET_METHOD_MODE1, +	AMD_RESET_METHOD_MODE2, +	AMD_RESET_METHOD_BACO +}; +  /*   * ASIC specific functions.   */ @@ -543,6 +553,7 @@ struct amdgpu_asic_funcs {  			     u32 sh_num, u32 reg_offset, u32 *value);  	void (*set_vga_state)(struct amdgpu_device *adev, bool state);  	int (*reset)(struct amdgpu_device *adev); +	enum amd_reset_method (*reset_method)(struct amdgpu_device *adev);  	/* get the reference clock */  	u32 (*get_xclk)(struct amdgpu_device *adev);  	/* MM block clocks */ @@ -627,6 +638,9 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);  typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);  typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); +typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t); +typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); +  typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);  typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); @@ -648,6 +662,12 @@ struct nbio_hdp_flush_reg {  	u32 ref_and_mask_cp9;  	u32 ref_and_mask_sdma0;  	u32 ref_and_mask_sdma1; +	u32 ref_and_mask_sdma2; +	u32 ref_and_mask_sdma3; +	u32 ref_and_mask_sdma4; +	u32 ref_and_mask_sdma5; +	u32 ref_and_mask_sdma6; +	u32 ref_and_mask_sdma7;  };  struct amdgpu_mmio_remap { @@ -668,7 +688,7 @@ struct amdgpu_nbio_funcs {  	void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,  			bool use_doorbell, int doorbell_index, int doorbell_size);  	void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, -			int doorbell_index); +				   int doorbell_index, int instance);  	void (*enable_doorbell_aperture)(struct amdgpu_device *adev,  					 bool enable);  	void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, @@ -705,6 +725,9 @@ struct amdgpu_df_funcs {  					 int is_disable);  	void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,  					 uint64_t *count); +	uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); +	void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, +			 uint32_t ficadl_val, uint32_t ficadh_val);  };  /* Define the HW IP blocks will be used in driver , add more if necessary */  enum amd_hw_ip_block_type { @@ -712,6 +735,12 @@ enum amd_hw_ip_block_type {  	HDP_HWIP,  	SDMA0_HWIP,  	SDMA1_HWIP, +	SDMA2_HWIP, +	SDMA3_HWIP, +	SDMA4_HWIP, +	SDMA5_HWIP, +	SDMA6_HWIP, +	SDMA7_HWIP,  	MMHUB_HWIP,  	ATHUB_HWIP,  	NBIO_HWIP, @@ -728,10 +757,12 @@ enum amd_hw_ip_block_type {  	NBIF_HWIP,  	THM_HWIP,  	CLK_HWIP, +	UMC_HWIP, +	RSMU_HWIP,  	MAX_HWIP  }; -#define HWIP_MAX_INSTANCE	6 +#define HWIP_MAX_INSTANCE	8  struct amd_powerplay {  	void *pp_handle; @@ -758,7 +789,6 @@ struct amdgpu_device {  	int				usec_timeout;  	const struct amdgpu_asic_funcs	*asic_funcs;  	bool				shutdown; -	bool				need_dma32;  	bool				need_swiotlb;  	bool				accel_working;  	struct notifier_block		acpi_nb; @@ -803,6 +833,8 @@ struct amdgpu_device {  	amdgpu_wreg_t			pcie_wreg;  	amdgpu_rreg_t			pciep_rreg;  	amdgpu_wreg_t			pciep_wreg; +	amdgpu_rreg64_t			pcie_rreg64; +	amdgpu_wreg64_t			pcie_wreg64;  	/* protects concurrent UVD register access */  	spinlock_t uvd_ctx_idx_lock;  	amdgpu_rreg_t			uvd_ctx_rreg; @@ -836,6 +868,7 @@ struct amdgpu_device {  	dma_addr_t			dummy_page_addr;  	struct amdgpu_vm_manager	vm_manager;  	struct amdgpu_vmhub             vmhub[AMDGPU_MAX_VMHUBS]; +	unsigned			num_vmhubs;  	/* memory management */  	struct amdgpu_mman		mman; @@ -915,6 +948,9 @@ struct amdgpu_device {  	/* KFD */  	struct amdgpu_kfd_dev		kfd; +	/* UMC */ +	struct amdgpu_umc		umc; +  	/* display related functionality */  	struct amdgpu_display_manager dm; @@ -940,6 +976,7 @@ struct amdgpu_device {  	const struct amdgpu_nbio_funcs	*nbio_funcs;  	const struct amdgpu_df_funcs	*df_funcs; +	const struct amdgpu_mmhub_funcs	*mmhub_funcs;  	/* delayed work_func for deferring clockgating during resume */  	struct delayed_work     delayed_init_work; @@ -965,6 +1002,7 @@ struct amdgpu_device {  	/* record last mm index being written through WREG32*/  	unsigned long last_mm_index;  	bool                            in_gpu_reset; +	enum pp_mp1_state               mp1_state;  	struct mutex  lock_reset;  	struct amdgpu_doorbell_index doorbell_index; @@ -1033,6 +1071,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);  #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))  #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))  #define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v)) +#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg)) +#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v))  #define RREG32_SMC(reg) adev->smc_rreg(adev, (reg))  #define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v))  #define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg)) @@ -1093,6 +1133,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);   */  #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))  #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev)) +#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev))  #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))  #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))  #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec)) @@ -1110,6 +1151,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);  #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))  #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))  #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) +#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));  /* Common functions */  bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 9fa4f25a3745..07eb29885372 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -87,7 +87,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)  	case CHIP_RAVEN:  		kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();  		break; +	case CHIP_ARCTURUS: +		kfd2kgd = amdgpu_amdkfd_arcturus_get_functions(); +		break;  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions();  		break;  	default: @@ -651,8 +656,12 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)kgd; -	if (adev->powerplay.pp_funcs && -	    adev->powerplay.pp_funcs->switch_power_profile) +	if (is_support_sw_smu(adev)) +		smu_switch_power_profile(&adev->smu, +					 PP_SMC_POWER_PROFILE_COMPUTE, +					 !idle); +	else if (adev->powerplay.pp_funcs && +		 adev->powerplay.pp_funcs->switch_power_profile)  		amdgpu_dpm_switch_power_profile(adev,  						PP_SMC_POWER_PROFILE_COMPUTE,  						!idle); @@ -715,6 +724,11 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)  	return NULL;  } +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) +{ +	return NULL; +} +  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void)  {  	return NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b6076d19e442..e519df3fd2b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -140,6 +140,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void);  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void);  bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c new file mode 100644 index 000000000000..c79aaebeeaf0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -0,0 +1,323 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#undef pr_fmt +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/mmu_context.h> +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "sdma0/sdma0_4_2_2_offset.h" +#include "sdma0/sdma0_4_2_2_sh_mask.h" +#include "sdma1/sdma1_4_2_2_offset.h" +#include "sdma1/sdma1_4_2_2_sh_mask.h" +#include "sdma2/sdma2_4_2_2_offset.h" +#include "sdma2/sdma2_4_2_2_sh_mask.h" +#include "sdma3/sdma3_4_2_2_offset.h" +#include "sdma3/sdma3_4_2_2_sh_mask.h" +#include "sdma4/sdma4_4_2_2_offset.h" +#include "sdma4/sdma4_4_2_2_sh_mask.h" +#include "sdma5/sdma5_4_2_2_offset.h" +#include "sdma5/sdma5_4_2_2_sh_mask.h" +#include "sdma6/sdma6_4_2_2_offset.h" +#include "sdma6/sdma6_4_2_2_sh_mask.h" +#include "sdma7/sdma7_4_2_2_offset.h" +#include "sdma7/sdma7_4_2_2_sh_mask.h" +#include "v9_structs.h" +#include "soc15.h" +#include "soc15d.h" +#include "amdgpu_amdkfd_gfx_v9.h" + +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do {				\ +		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\ +			break;				\ +		(*dump)[i][0] = (addr) << 2;		\ +		(*dump)[i++][1] = RREG32(addr);		\ +	} while (0) + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ +	return (struct amdgpu_device *)kgd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ +	return (struct v9_sdma_mqd *)mqd; +} + +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, +				unsigned int engine_id, +				unsigned int queue_id) +{ +	uint32_t base[8] = { +		SOC15_REG_OFFSET(SDMA0, 0, +				 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, +		SOC15_REG_OFFSET(SDMA1, 0, +				 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, +		SOC15_REG_OFFSET(SDMA2, 0, +				 mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, +		SOC15_REG_OFFSET(SDMA3, 0, +				 mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, +		SOC15_REG_OFFSET(SDMA4, 0, +				 mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, +		SOC15_REG_OFFSET(SDMA5, 0, +				 mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, +		SOC15_REG_OFFSET(SDMA6, 0, +				 mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, +		SOC15_REG_OFFSET(SDMA7, 0, +				 mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL +	}; +	uint32_t retval; + +	retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - +					       mmSDMA0_RLC0_RB_CNTL); + +	pr_debug("sdma base address: 0x%x\n", retval); + +	return retval; +} + +static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, +		u32 instance, u32 offset) +{ +	switch (instance) { +	case 0: +		return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); +	case 1: +		return (adev->reg_offset[SDMA1_HWIP][0][1] + offset); +	case 2: +		return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); +	case 3: +		return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); +	case 4: +		return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); +	case 5: +		return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); +	case 6: +		return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); +	case 7: +		return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); +	default: +		break; +	} +	return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +			     uint32_t __user *wptr, struct mm_struct *mm) +{ +	struct amdgpu_device *adev = get_amdgpu_device(kgd); +	struct v9_sdma_mqd *m; +	uint32_t sdma_base_addr, sdmax_gfx_context_cntl; +	unsigned long end_jiffies; +	uint32_t data; +	uint64_t data64; +	uint64_t __user *wptr64 = (uint64_t __user *)wptr; + +	m = get_sdma_mqd(mqd); +	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, +					    m->sdma_queue_id); +	sdmax_gfx_context_cntl = sdma_v4_0_get_reg_offset(adev, +			m->sdma_engine_id, mmSDMA0_GFX_CONTEXT_CNTL); + +	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, +		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + +	end_jiffies = msecs_to_jiffies(2000) + jiffies; +	while (true) { +		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); +		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) +			break; +		if (time_after(jiffies, end_jiffies)) +			return -ETIME; +		usleep_range(500, 1000); +	} +	data = RREG32(sdmax_gfx_context_cntl); +	data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, +			     RESUME_CTX, 0); +	WREG32(sdmax_gfx_context_cntl, data); + +	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, +	       m->sdmax_rlcx_doorbell_offset); + +	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, +			     ENABLE, 1); +	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); +	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); +	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, +				m->sdmax_rlcx_rb_rptr_hi); + +	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); +	if (read_user_wptr(mm, wptr64, data64)) { +		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, +		       lower_32_bits(data64)); +		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, +		       upper_32_bits(data64)); +	} else { +		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, +		       m->sdmax_rlcx_rb_rptr); +		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, +		       m->sdmax_rlcx_rb_rptr_hi); +	} +	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + +	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); +	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, +			m->sdmax_rlcx_rb_base_hi); +	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, +			m->sdmax_rlcx_rb_rptr_addr_lo); +	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, +			m->sdmax_rlcx_rb_rptr_addr_hi); + +	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, +			     RB_ENABLE, 1); +	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + +	return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +			     uint32_t engine_id, uint32_t queue_id, +			     uint32_t (**dump)[2], uint32_t *n_regs) +{ +	struct amdgpu_device *adev = get_amdgpu_device(kgd); +	uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); +	uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + +	*dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); +	if (*dump == NULL) +		return -ENOMEM; + +	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) +		DUMP_REG(sdma_base_addr + reg); +	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) +		DUMP_REG(sdma_base_addr + reg); +	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; +	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) +		DUMP_REG(sdma_base_addr + reg); +	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; +	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) +		DUMP_REG(sdma_base_addr + reg); + +	WARN_ON_ONCE(i != HQD_N_REGS); +	*n_regs = i; + +	return 0; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ +	struct amdgpu_device *adev = get_amdgpu_device(kgd); +	struct v9_sdma_mqd *m; +	uint32_t sdma_base_addr; +	uint32_t sdma_rlc_rb_cntl; + +	m = get_sdma_mqd(mqd); +	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, +					    m->sdma_queue_id); + +	sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + +	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) +		return true; + +	return false; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +				unsigned int utimeout) +{ +	struct amdgpu_device *adev = get_amdgpu_device(kgd); +	struct v9_sdma_mqd *m; +	uint32_t sdma_base_addr; +	uint32_t temp; +	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + +	m = get_sdma_mqd(mqd); +	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, +					    m->sdma_queue_id); + +	temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); +	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; +	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + +	while (true) { +		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); +		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) +			break; +		if (time_after(jiffies, end_jiffies)) +			return -ETIME; +		usleep_range(500, 1000); +	} + +	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); +	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, +		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | +		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + +	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); +	m->sdmax_rlcx_rb_rptr_hi = +		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + +	return 0; +} + +static const struct kfd2kgd_calls kfd2kgd = { +	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, +	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, +	.init_interrupts = kgd_gfx_v9_init_interrupts, +	.hqd_load = kgd_gfx_v9_hqd_load, +	.hqd_sdma_load = kgd_hqd_sdma_load, +	.hqd_dump = kgd_gfx_v9_hqd_dump, +	.hqd_sdma_dump = kgd_hqd_sdma_dump, +	.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, +	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, +	.hqd_destroy = kgd_gfx_v9_hqd_destroy, +	.hqd_sdma_destroy = kgd_hqd_sdma_destroy, +	.address_watch_disable = kgd_gfx_v9_address_watch_disable, +	.address_watch_execute = kgd_gfx_v9_address_watch_execute, +	.wave_control_execute = kgd_gfx_v9_wave_control_execute, +	.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, +	.get_atc_vmid_pasid_mapping_pasid = +			kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, +	.get_atc_vmid_pasid_mapping_valid = +			kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, +	.set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, +	.get_tile_config = kgd_gfx_v9_get_tile_config, +	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, +	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, +	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, +	.get_hive_id = amdgpu_amdkfd_get_hive_id, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) +{ +	return (struct kfd2kgd_calls *)&kfd2kgd; +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 0723f800e815..d10f483f5e27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -27,7 +27,6 @@  #include <linux/uaccess.h>  #include <linux/firmware.h>  #include <linux/mmu_context.h> -#include <drm/drmP.h>  #include "amdgpu.h"  #include "amdgpu_amdkfd.h"  #include "amdgpu_ucode.h" @@ -802,42 +801,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,  	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;  } -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ -	struct amdgpu_device *adev = (struct amdgpu_device *) kgd; -	uint32_t req = (1 << vmid) | -		(0 << GCVM_INVALIDATE_ENG0_REQ__FLUSH_TYPE__SHIFT) |/* legacy */ -		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PTES_MASK | -		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE0_MASK | -		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE1_MASK | -		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE2_MASK | -		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L1_PTES_MASK; - -	mutex_lock(&adev->srbm_mutex); - -	/* Use light weight invalidation. -	 * -	 * TODO 1: agree on the right set of invalidation registers for -	 * KFD use. Use the last one for now. Invalidate only GCHUB as -	 * SDMA is now moved to GCHUB -	 * -	 * TODO 2: support range-based invalidation, requires kfg2kgd -	 * interface change -	 */ -	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32), -				0xffffffff); -	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32), -				0x0000001f); - -	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ), req); - -	while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK)) & -					(1 << vmid))) -		cpu_relax(); - -	mutex_unlock(&adev->srbm_mutex); -} -  static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)  {  	signed long r; @@ -878,7 +841,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)  		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {  			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)  				== pasid) { -				write_vmid_invalidate_request(kgd, vmid); +				amdgpu_gmc_flush_gpu_tlb(adev, vmid, +						AMDGPU_GFXHUB_0, 0);  				break;  			}  		} @@ -896,7 +860,7 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)  		return 0;  	} -	write_vmid_invalidate_request(kgd, vmid); +	amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 85395f2d83a6..e262f2ac07a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -47,6 +47,7 @@  #include "soc15d.h"  #include "mmhub_v1_0.h"  #include "gfxhub_v1_0.h" +#include "gmc_v9_0.h"  #define V9_PIPE_PER_MEC		(4) @@ -58,66 +59,11 @@ enum hqd_dequeue_request_type {  	RESET_WAVES  }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, -		uint32_t sh_mem_config, -		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, -		uint32_t sh_mem_bases); -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, -		unsigned int vmid); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, -			uint32_t queue_id, uint32_t __user *wptr, -			uint32_t wptr_shift, uint32_t wptr_mask, -			struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, -			uint32_t pipe_id, uint32_t queue_id, -			uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, -			     uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, -			     uint32_t engine_id, uint32_t queue_id, -			     uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, -		uint32_t pipe_id, uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, -				enum kfd_preempt_type reset_type, -				unsigned int utimeout, uint32_t pipe_id, -				uint32_t queue_id); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, -				unsigned int utimeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, -					unsigned int watch_point_id, -					uint32_t cntl_val, -					uint32_t addr_hi, -					uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, -					uint32_t gfx_index_val, -					uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, -					unsigned int watch_point_id, -					unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, -		uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, -		uint8_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, -		uint64_t page_table_base); -static void set_scratch_backing_va(struct kgd_dev *kgd, -					uint64_t va, uint32_t vmid); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);  /* Because of REG_GET_FIELD() being used, we put this function in the   * asic specific file.   */ -static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, +int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,  		struct tile_config *config)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -135,39 +81,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,  	return 0;  } -static const struct kfd2kgd_calls kfd2kgd = { -	.program_sh_mem_settings = kgd_program_sh_mem_settings, -	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, -	.init_interrupts = kgd_init_interrupts, -	.hqd_load = kgd_hqd_load, -	.hqd_sdma_load = kgd_hqd_sdma_load, -	.hqd_dump = kgd_hqd_dump, -	.hqd_sdma_dump = kgd_hqd_sdma_dump, -	.hqd_is_occupied = kgd_hqd_is_occupied, -	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, -	.hqd_destroy = kgd_hqd_destroy, -	.hqd_sdma_destroy = kgd_hqd_sdma_destroy, -	.address_watch_disable = kgd_address_watch_disable, -	.address_watch_execute = kgd_address_watch_execute, -	.wave_control_execute = kgd_wave_control_execute, -	.address_watch_get_offset = kgd_address_watch_get_offset, -	.get_atc_vmid_pasid_mapping_pasid = -			get_atc_vmid_pasid_mapping_pasid, -	.get_atc_vmid_pasid_mapping_valid = -			get_atc_vmid_pasid_mapping_valid, -	.set_scratch_backing_va = set_scratch_backing_va, -	.get_tile_config = amdgpu_amdkfd_get_tile_config, -	.set_vm_context_page_table_base = set_vm_context_page_table_base, -	.invalidate_tlbs = invalidate_tlbs, -	.invalidate_tlbs_vmid = invalidate_tlbs_vmid, -	.get_hive_id = amdgpu_amdkfd_get_hive_id, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) -{ -	return (struct kfd2kgd_calls *)&kfd2kgd; -} -  static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)  {  	return (struct amdgpu_device *)kgd; @@ -215,7 +128,7 @@ static void release_queue(struct kgd_dev *kgd)  	unlock_srbm(kgd);  } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,  					uint32_t sh_mem_config,  					uint32_t sh_mem_ape1_base,  					uint32_t sh_mem_ape1_limit, @@ -232,7 +145,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,  	unlock_srbm(kgd);  } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, +int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,  					unsigned int vmid)  {  	struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -293,7 +206,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,   * but still works   */ -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)  {  	struct amdgpu_device *adev = get_amdgpu_device(kgd);  	uint32_t mec; @@ -343,7 +256,7 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)  	return (struct v9_sdma_mqd *)mqd;  } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,  			uint32_t queue_id, uint32_t __user *wptr,  			uint32_t wptr_shift, uint32_t wptr_mask,  			struct mm_struct *mm) @@ -438,7 +351,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,  	return 0;  } -static int kgd_hqd_dump(struct kgd_dev *kgd, +int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd,  			uint32_t pipe_id, uint32_t queue_id,  			uint32_t (**dump)[2], uint32_t *n_regs)  { @@ -575,7 +488,7 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,  	return 0;  } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, +bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,  				uint32_t pipe_id, uint32_t queue_id)  {  	struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -616,7 +529,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)  	return false;  } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,  				enum kfd_preempt_type reset_type,  				unsigned int utimeout, uint32_t pipe_id,  				uint32_t queue_id) @@ -704,7 +617,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,  	return 0;  } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,  							uint8_t vmid)  {  	uint32_t reg; @@ -715,7 +628,7 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,  	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;  } -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, +uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,  								uint8_t vmid)  {  	uint32_t reg; @@ -754,10 +667,10 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,  	return 0;  } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)  {  	struct amdgpu_device *adev = (struct amdgpu_device *) kgd; -	int vmid; +	int vmid, i;  	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;  	uint32_t flush_type = 0; @@ -773,11 +686,12 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)  	for (vmid = 0; vmid < 16; vmid++) {  		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))  			continue; -		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { -			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) +		if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { +			if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid)  				== pasid) { -				amdgpu_gmc_flush_gpu_tlb(adev, vmid, -							 flush_type); +				for (i = 0; i < adev->num_vmhubs; i++) +					amdgpu_gmc_flush_gpu_tlb(adev, vmid, +								i, flush_type);  				break;  			}  		} @@ -786,9 +700,10 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)  	return 0;  } -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)  {  	struct amdgpu_device *adev = (struct amdgpu_device *) kgd; +	int i;  	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {  		pr_err("non kfd vmid %d\n", vmid); @@ -810,16 +725,18 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)  	 * TODO 2: support range-based invalidation, requires kfg2kgd  	 * interface change  	 */ -	amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); +	for (i = 0; i < adev->num_vmhubs; i++) +		amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); +  	return 0;  } -static int kgd_address_watch_disable(struct kgd_dev *kgd) +int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd)  {  	return 0;  } -static int kgd_address_watch_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd,  					unsigned int watch_point_id,  					uint32_t cntl_val,  					uint32_t addr_hi, @@ -828,7 +745,7 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd,  	return 0;  } -static int kgd_wave_control_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd,  					uint32_t gfx_index_val,  					uint32_t sq_cmd)  { @@ -853,14 +770,14 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,  	return 0;  } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,  					unsigned int watch_point_id,  					unsigned int reg_offset)  {  	return 0;  } -static void set_scratch_backing_va(struct kgd_dev *kgd, +void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd,  					uint64_t va, uint32_t vmid)  {  	/* No longer needed on GFXv9. The scratch base address is @@ -869,7 +786,7 @@ static void set_scratch_backing_va(struct kgd_dev *kgd,  	 */  } -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,  		uint64_t page_table_base)  {  	struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -884,7 +801,45 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,  	 * now, all processes share the same address space size, like  	 * on GFX8 and older.  	 */ -	mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); +	if (adev->asic_type == CHIP_ARCTURUS) { +		/* Two MMHUBs */ +		mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base); +		mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base); +	} else +		mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);  	gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);  } + +static const struct kfd2kgd_calls kfd2kgd = { +	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, +	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, +	.init_interrupts = kgd_gfx_v9_init_interrupts, +	.hqd_load = kgd_gfx_v9_hqd_load, +	.hqd_sdma_load = kgd_hqd_sdma_load, +	.hqd_dump = kgd_gfx_v9_hqd_dump, +	.hqd_sdma_dump = kgd_hqd_sdma_dump, +	.hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, +	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, +	.hqd_destroy = kgd_gfx_v9_hqd_destroy, +	.hqd_sdma_destroy = kgd_hqd_sdma_destroy, +	.address_watch_disable = kgd_gfx_v9_address_watch_disable, +	.address_watch_execute = kgd_gfx_v9_address_watch_execute, +	.wave_control_execute = kgd_gfx_v9_wave_control_execute, +	.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, +	.get_atc_vmid_pasid_mapping_pasid = +			kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, +	.get_atc_vmid_pasid_mapping_valid = +			kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, +	.set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, +	.get_tile_config = kgd_gfx_v9_get_tile_config, +	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, +	.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, +	.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, +	.get_hive_id = amdgpu_amdkfd_get_hive_id, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ +	return (struct kfd2kgd_calls *)&kfd2kgd; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h new file mode 100644 index 000000000000..26d8879bff9d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -0,0 +1,69 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + + +void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +		uint32_t sh_mem_config, +		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, +		uint32_t sh_mem_bases); +int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, +		unsigned int vmid); +int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +			uint32_t queue_id, uint32_t __user *wptr, +			uint32_t wptr_shift, uint32_t wptr_mask, +			struct mm_struct *mm); +int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, +			uint32_t pipe_id, uint32_t queue_id, +			uint32_t (**dump)[2], uint32_t *n_regs); +bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, +		uint32_t pipe_id, uint32_t queue_id); +int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, +				enum kfd_preempt_type reset_type, +				unsigned int utimeout, uint32_t pipe_id, +				uint32_t queue_id); +int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd); +int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, +					unsigned int watch_point_id, +					uint32_t cntl_val, +					uint32_t addr_hi, +					uint32_t addr_lo); +int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, +					uint32_t gfx_index_val, +					uint32_t sq_cmd); +uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, +					unsigned int watch_point_id, +					unsigned int reg_offset); + +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, +		uint8_t vmid); +uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, +		uint8_t vmid); +void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, +		uint64_t page_table_base); +void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, +					uint64_t va, uint32_t vmid); +int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); +int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, +		struct tile_config *config); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1d3ee9c42f7e..6d021ecc8d59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -218,14 +218,14 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)  static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,  					struct amdgpu_amdkfd_fence *ef)  { -	struct reservation_object *resv = bo->tbo.resv; -	struct reservation_object_list *old, *new; +	struct dma_resv *resv = bo->tbo.base.resv; +	struct dma_resv_list *old, *new;  	unsigned int i, j, k;  	if (!ef)  		return -EINVAL; -	old = reservation_object_get_list(resv); +	old = dma_resv_get_list(resv);  	if (!old)  		return 0; @@ -241,7 +241,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,  		struct dma_fence *f;  		f = rcu_dereference_protected(old->shared[i], -					      reservation_object_held(resv)); +					      dma_resv_held(resv));  		if (f->context == ef->base.context)  			RCU_INIT_POINTER(new->shared[--j], f); @@ -263,7 +263,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,  		struct dma_fence *f;  		f = rcu_dereference_protected(new->shared[i], -					      reservation_object_held(resv)); +					      dma_resv_held(resv));  		dma_fence_put(f);  	}  	kfree_rcu(old, rcu); @@ -812,7 +812,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info,  		struct amdgpu_bo *pd = peer_vm->root.base.bo;  		ret = amdgpu_sync_resv(NULL, -					sync, pd->tbo.resv, +					sync, pd->tbo.base.resv,  					AMDGPU_FENCE_OWNER_KFD, false);  		if (ret)  			return ret; @@ -887,7 +887,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,  				  AMDGPU_FENCE_OWNER_KFD, false);  	if (ret)  		goto wait_pd_fail; -	ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); +	ret = dma_resv_reserve_shared(vm->root.base.bo->tbo.base.resv, 1);  	if (ret)  		goto reserve_shared_fail;  	amdgpu_bo_fence(vm->root.base.bo, @@ -1090,7 +1090,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(  	 */  	if (flags & ALLOC_MEM_FLAGS_VRAM) {  		domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; -		alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; +		alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;  		alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?  			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :  			AMDGPU_GEM_CREATE_NO_CPU_ACCESS; @@ -1103,7 +1103,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(  		alloc_flags = 0;  		if (!offset || !*offset)  			return -EINVAL; -		user_addr = *offset; +		user_addr = untagged_addr(*offset);  	} else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |  			ALLOC_MEM_FLAGS_MMIO_REMAP)) {  		domain = AMDGPU_GEM_DOMAIN_GTT; @@ -1140,7 +1140,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(  			adev->asic_type != CHIP_FIJI &&  			adev->asic_type != CHIP_POLARIS10 &&  			adev->asic_type != CHIP_POLARIS11 && -			adev->asic_type != CHIP_POLARIS12) ? +			adev->asic_type != CHIP_POLARIS12 && +			adev->asic_type != CHIP_VEGAM) ?  			VI_BO_SIZE_ALIGN : 1;  	mapping_flags = AMDGPU_VM_PAGE_READABLE; @@ -2132,7 +2133,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem  	 * Add process eviction fence to bo so they can  	 * evict each other.  	 */ -	ret = reservation_object_reserve_shared(gws_bo->tbo.resv, 1); +	ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);  	if (ret)  		goto reserve_shared_fail;  	amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 9b384a94d2f3..3e35a8f2c5e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -574,6 +574,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {  	{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },  	{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },  	{ 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, +	{ 0x1002, 0x699f, 0x1028, 0x0814, AMDGPU_PX_QUIRK_FORCE_ATPX },  	{ 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX },  	{ 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX },  	{ 0, 0, 0, 0, 0 }, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 73b2ede773d3..ece55c8fa673 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1505,6 +1505,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,  	struct amdgpu_connector_atom_dig *amdgpu_dig_connector;  	struct drm_encoder *encoder;  	struct amdgpu_encoder *amdgpu_encoder; +	struct i2c_adapter *ddc = NULL;  	uint32_t subpixel_order = SubPixelNone;  	bool shared_ddc = false;  	bool is_dp_bridge = false; @@ -1574,17 +1575,21 @@ amdgpu_connector_add(struct amdgpu_device *adev,  		amdgpu_connector->con_priv = amdgpu_dig_connector;  		if (i2c_bus->valid) {  			amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); -			if (amdgpu_connector->ddc_bus) +			if (amdgpu_connector->ddc_bus) {  				has_aux = true; -			else +				ddc = &amdgpu_connector->ddc_bus->adapter; +			} else {  				DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); +			}  		}  		switch (connector_type) {  		case DRM_MODE_CONNECTOR_VGA:  		case DRM_MODE_CONNECTOR_DVIA:  		default: -			drm_connector_init(dev, &amdgpu_connector->base, -					   &amdgpu_connector_dp_funcs, connector_type); +			drm_connector_init_with_ddc(dev, &amdgpu_connector->base, +						    &amdgpu_connector_dp_funcs, +						    connector_type, +						    ddc);  			drm_connector_helper_add(&amdgpu_connector->base,  						 &amdgpu_connector_dp_helper_funcs);  			connector->interlace_allowed = true; @@ -1602,8 +1607,10 @@ amdgpu_connector_add(struct amdgpu_device *adev,  		case DRM_MODE_CONNECTOR_HDMIA:  		case DRM_MODE_CONNECTOR_HDMIB:  		case DRM_MODE_CONNECTOR_DisplayPort: -			drm_connector_init(dev, &amdgpu_connector->base, -					   &amdgpu_connector_dp_funcs, connector_type); +			drm_connector_init_with_ddc(dev, &amdgpu_connector->base, +						    &amdgpu_connector_dp_funcs, +						    connector_type, +						    ddc);  			drm_connector_helper_add(&amdgpu_connector->base,  						 &amdgpu_connector_dp_helper_funcs);  			drm_object_attach_property(&amdgpu_connector->base.base, @@ -1644,8 +1651,10 @@ amdgpu_connector_add(struct amdgpu_device *adev,  			break;  		case DRM_MODE_CONNECTOR_LVDS:  		case DRM_MODE_CONNECTOR_eDP: -			drm_connector_init(dev, &amdgpu_connector->base, -					   &amdgpu_connector_edp_funcs, connector_type); +			drm_connector_init_with_ddc(dev, &amdgpu_connector->base, +						    &amdgpu_connector_edp_funcs, +						    connector_type, +						    ddc);  			drm_connector_helper_add(&amdgpu_connector->base,  						 &amdgpu_connector_dp_helper_funcs);  			drm_object_attach_property(&amdgpu_connector->base.base, @@ -1659,13 +1668,18 @@ amdgpu_connector_add(struct amdgpu_device *adev,  	} else {  		switch (connector_type) {  		case DRM_MODE_CONNECTOR_VGA: -			drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type); -			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);  			if (i2c_bus->valid) {  				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);  				if (!amdgpu_connector->ddc_bus)  					DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); +				else +					ddc = &amdgpu_connector->ddc_bus->adapter;  			} +			drm_connector_init_with_ddc(dev, &amdgpu_connector->base, +						    &amdgpu_connector_vga_funcs, +						    connector_type, +						    ddc); +			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);  			amdgpu_connector->dac_load_detect = true;  			drm_object_attach_property(&amdgpu_connector->base.base,  						      adev->mode_info.load_detect_property, @@ -1679,13 +1693,18 @@ amdgpu_connector_add(struct amdgpu_device *adev,  			connector->doublescan_allowed = true;  			break;  		case DRM_MODE_CONNECTOR_DVIA: -			drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type); -			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);  			if (i2c_bus->valid) {  				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);  				if (!amdgpu_connector->ddc_bus)  					DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); +				else +					ddc = &amdgpu_connector->ddc_bus->adapter;  			} +			drm_connector_init_with_ddc(dev, &amdgpu_connector->base, +						    &amdgpu_connector_vga_funcs, +						    connector_type, +						    ddc); +			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs);  			amdgpu_connector->dac_load_detect = true;  			drm_object_attach_property(&amdgpu_connector->base.base,  						      adev->mode_info.load_detect_property, @@ -1704,13 +1723,18 @@ amdgpu_connector_add(struct amdgpu_device *adev,  			if (!amdgpu_dig_connector)  				goto failed;  			amdgpu_connector->con_priv = amdgpu_dig_connector; -			drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type); -			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);  			if (i2c_bus->valid) {  				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);  				if (!amdgpu_connector->ddc_bus)  					DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); +				else +					ddc = &amdgpu_connector->ddc_bus->adapter;  			} +			drm_connector_init_with_ddc(dev, &amdgpu_connector->base, +						    &amdgpu_connector_dvi_funcs, +						    connector_type, +						    ddc); +			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);  			subpixel_order = SubPixelHorizontalRGB;  			drm_object_attach_property(&amdgpu_connector->base.base,  						      adev->mode_info.coherent_mode_property, @@ -1754,13 +1778,18 @@ amdgpu_connector_add(struct amdgpu_device *adev,  			if (!amdgpu_dig_connector)  				goto failed;  			amdgpu_connector->con_priv = amdgpu_dig_connector; -			drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type); -			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);  			if (i2c_bus->valid) {  				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);  				if (!amdgpu_connector->ddc_bus)  					DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); +				else +					ddc = &amdgpu_connector->ddc_bus->adapter;  			} +			drm_connector_init_with_ddc(dev, &amdgpu_connector->base, +						    &amdgpu_connector_dvi_funcs, +						    connector_type, +						    ddc); +			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs);  			drm_object_attach_property(&amdgpu_connector->base.base,  						      adev->mode_info.coherent_mode_property,  						      1); @@ -1796,15 +1825,20 @@ amdgpu_connector_add(struct amdgpu_device *adev,  			if (!amdgpu_dig_connector)  				goto failed;  			amdgpu_connector->con_priv = amdgpu_dig_connector; -			drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dp_funcs, connector_type); -			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);  			if (i2c_bus->valid) {  				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); -				if (amdgpu_connector->ddc_bus) +				if (amdgpu_connector->ddc_bus) {  					has_aux = true; -				else +					ddc = &amdgpu_connector->ddc_bus->adapter; +				} else {  					DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); +				}  			} +			drm_connector_init_with_ddc(dev, &amdgpu_connector->base, +						    &amdgpu_connector_dp_funcs, +						    connector_type, +						    ddc); +			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);  			subpixel_order = SubPixelHorizontalRGB;  			drm_object_attach_property(&amdgpu_connector->base.base,  						      adev->mode_info.coherent_mode_property, @@ -1838,15 +1872,20 @@ amdgpu_connector_add(struct amdgpu_device *adev,  			if (!amdgpu_dig_connector)  				goto failed;  			amdgpu_connector->con_priv = amdgpu_dig_connector; -			drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_edp_funcs, connector_type); -			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);  			if (i2c_bus->valid) {  				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); -				if (amdgpu_connector->ddc_bus) +				if (amdgpu_connector->ddc_bus) {  					has_aux = true; -				else +					ddc = &amdgpu_connector->ddc_bus->adapter; +				} else {  					DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); +				}  			} +			drm_connector_init_with_ddc(dev, &amdgpu_connector->base, +						    &amdgpu_connector_edp_funcs, +						    connector_type, +						    ddc); +			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs);  			drm_object_attach_property(&amdgpu_connector->base.base,  						      dev->mode_config.scaling_mode_property,  						      DRM_MODE_SCALE_FULLSCREEN); @@ -1859,13 +1898,18 @@ amdgpu_connector_add(struct amdgpu_device *adev,  			if (!amdgpu_dig_connector)  				goto failed;  			amdgpu_connector->con_priv = amdgpu_dig_connector; -			drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_lvds_funcs, connector_type); -			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs);  			if (i2c_bus->valid) {  				amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus);  				if (!amdgpu_connector->ddc_bus)  					DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); +				else +					ddc = &amdgpu_connector->ddc_bus->adapter;  			} +			drm_connector_init_with_ddc(dev, &amdgpu_connector->base, +						    &amdgpu_connector_lvds_funcs, +						    connector_type, +						    ddc); +			drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs);  			drm_object_attach_property(&amdgpu_connector->base.base,  						      dev->mode_config.scaling_mode_property,  						      DRM_MODE_SCALE_FULLSCREEN); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e069de8b54e6..2e53feed40e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -402,7 +402,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,  	struct ttm_operation_ctx ctx = {  		.interruptible = true,  		.no_wait_gpu = false, -		.resv = bo->tbo.resv, +		.resv = bo->tbo.base.resv,  		.flags = 0  	};  	uint32_t domain; @@ -730,7 +730,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)  	list_for_each_entry(e, &p->validated, tv.head) {  		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); -		struct reservation_object *resv = bo->tbo.resv; +		struct dma_resv *resv = bo->tbo.base.resv;  		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,  				     amdgpu_bo_explicit_sync(bo)); @@ -1044,29 +1044,27 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,  			return r;  		} -		fence = amdgpu_ctx_get_fence(ctx, entity, -					     deps[i].handle); +		fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); +		amdgpu_ctx_put(ctx); + +		if (IS_ERR(fence)) +			return PTR_ERR(fence); +		else if (!fence) +			continue;  		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { -			struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); +			struct drm_sched_fence *s_fence;  			struct dma_fence *old = fence; +			s_fence = to_drm_sched_fence(fence);  			fence = dma_fence_get(&s_fence->scheduled);  			dma_fence_put(old);  		} -		if (IS_ERR(fence)) { -			r = PTR_ERR(fence); -			amdgpu_ctx_put(ctx); +		r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); +		dma_fence_put(fence); +		if (r)  			return r; -		} else if (fence) { -			r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, -					true); -			dma_fence_put(fence); -			amdgpu_ctx_put(ctx); -			if (r) -				return r; -		}  	}  	return 0;  } @@ -1145,6 +1143,9 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,  	num_deps = chunk->length_dw * 4 /  		sizeof(struct drm_amdgpu_cs_chunk_sem); +	if (p->post_deps) +		return -EINVAL; +  	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),  				     GFP_KERNEL);  	p->num_post_deps = 0; @@ -1168,8 +1169,7 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,  static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, -						      struct amdgpu_cs_chunk -						      *chunk) +						      struct amdgpu_cs_chunk *chunk)  {  	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;  	unsigned num_deps; @@ -1179,6 +1179,9 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p  	num_deps = chunk->length_dw * 4 /  		sizeof(struct drm_amdgpu_cs_chunk_syncobj); +	if (p->post_deps) +		return -EINVAL; +  	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),  				     GFP_KERNEL);  	p->num_post_deps = 0; @@ -1729,7 +1732,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,  	*map = mapping;  	/* Double check that the BO is reserved by this CS */ -	if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) +	if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)  		return -EINVAL;  	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index f539a2a92774..6614d8a6f4c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -42,7 +42,7 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {  	[AMDGPU_HW_IP_VCN_JPEG]	=	1,  }; -static int amdgput_ctx_total_num_entities(void) +static int amdgpu_ctx_total_num_entities(void)  {  	unsigned i, num_entities = 0; @@ -73,8 +73,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,  			   struct drm_file *filp,  			   struct amdgpu_ctx *ctx)  { -	unsigned num_entities = amdgput_ctx_total_num_entities(); -	unsigned i, j; +	unsigned num_entities = amdgpu_ctx_total_num_entities(); +	unsigned i, j, k;  	int r;  	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) @@ -123,7 +123,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,  	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {  		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];  		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; -		unsigned num_rings; +		unsigned num_rings = 0;  		unsigned num_rqs = 0;  		switch (i) { @@ -154,16 +154,26 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,  			num_rings = 1;  			break;  		case AMDGPU_HW_IP_VCN_DEC: -			rings[0] = &adev->vcn.ring_dec; -			num_rings = 1; +			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { +				if (adev->vcn.harvest_config & (1 << j)) +					continue; +				rings[num_rings++] = &adev->vcn.inst[j].ring_dec; +			}  			break;  		case AMDGPU_HW_IP_VCN_ENC: -			rings[0] = &adev->vcn.ring_enc[0]; -			num_rings = 1; +			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { +				if (adev->vcn.harvest_config & (1 << j)) +					continue; +				for (k = 0; k < adev->vcn.num_enc_rings; ++k) +					rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k]; +			}  			break;  		case AMDGPU_HW_IP_VCN_JPEG: -			rings[0] = &adev->vcn.ring_jpeg; -			num_rings = 1; +			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { +				if (adev->vcn.harvest_config & (1 << j)) +					continue; +				rings[num_rings++] = &adev->vcn.inst[j].ring_jpeg; +			}  			break;  		} @@ -197,7 +207,7 @@ error_free_fences:  static void amdgpu_ctx_fini(struct kref *ref)  {  	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); -	unsigned num_entities = amdgput_ctx_total_num_entities(); +	unsigned num_entities = amdgpu_ctx_total_num_entities();  	struct amdgpu_device *adev = ctx->adev;  	unsigned i, j; @@ -279,10 +289,7 @@ static void amdgpu_ctx_do_release(struct kref *ref)  	ctx = container_of(ref, struct amdgpu_ctx, refcount); -	num_entities = 0; -	for (i = 0; i < AMDGPU_HW_IP_NUM; i++) -		num_entities += amdgpu_ctx_num_entities[i]; - +	num_entities = amdgpu_ctx_total_num_entities();  	for (i = 0; i < num_entities; i++)  		drm_sched_entity_destroy(&ctx->entities[0][i].entity); @@ -344,7 +351,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,  {  	struct amdgpu_ctx *ctx;  	struct amdgpu_ctx_mgr *mgr; -	uint32_t ras_counter; +	unsigned long ras_counter;  	if (!fpriv)  		return -EINVAL; @@ -514,7 +521,7 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,  void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,  				  enum drm_sched_priority priority)  { -	unsigned num_entities = amdgput_ctx_total_num_entities(); +	unsigned num_entities = amdgpu_ctx_total_num_entities();  	enum drm_sched_priority ctx_prio;  	unsigned i; @@ -534,21 +541,24 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,  			       struct drm_sched_entity *entity)  {  	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); -	unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1); -	struct dma_fence *other = centity->fences[idx]; +	struct dma_fence *other; +	unsigned idx; +	long r; -	if (other) { -		signed long r; -		r = dma_fence_wait(other, true); -		if (r < 0) { -			if (r != -ERESTARTSYS) -				DRM_ERROR("Error (%ld) waiting for fence!\n", r); +	spin_lock(&ctx->ring_lock); +	idx = centity->sequence & (amdgpu_sched_jobs - 1); +	other = dma_fence_get(centity->fences[idx]); +	spin_unlock(&ctx->ring_lock); -			return r; -		} -	} +	if (!other) +		return 0; -	return 0; +	r = dma_fence_wait(other, true); +	if (r < 0 && r != -ERESTARTSYS) +		DRM_ERROR("Error (%ld) waiting for fence!\n", r); + +	dma_fence_put(other); +	return r;  }  void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) @@ -559,7 +569,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)  long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)  { -	unsigned num_entities = amdgput_ctx_total_num_entities(); +	unsigned num_entities = amdgpu_ctx_total_num_entities();  	struct amdgpu_ctx *ctx;  	struct idr *idp;  	uint32_t id, i; @@ -581,7 +591,7 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)  void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)  { -	unsigned num_entities = amdgput_ctx_total_num_entities(); +	unsigned num_entities = amdgpu_ctx_total_num_entities();  	struct amdgpu_ctx *ctx;  	struct idr *idp;  	uint32_t id, i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 5f1b54c9bcdb..da808633732b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -49,8 +49,8 @@ struct amdgpu_ctx {  	enum drm_sched_priority		override_priority;  	struct mutex			lock;  	atomic_t			guilty; -	uint32_t			ras_counter_ce; -	uint32_t			ras_counter_ue; +	unsigned long			ras_counter_ce; +	unsigned long			ras_counter_ue;  };  struct amdgpu_ctx_mgr { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 6d54decef7f8..5652cc72ed3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -707,7 +707,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,  	thread = (*pos & GENMASK_ULL(59, 52)) >> 52;  	bank = (*pos & GENMASK_ULL(61, 60)) >> 60; -	data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL); +	data = kcalloc(1024, sizeof(*data), GFP_KERNEL);  	if (!data)  		return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5a7f893cf724..5a1939dbd4e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -70,7 +70,11 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");  MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");  MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");  MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");  #define AMDGPU_RESUME_MS		2000 @@ -98,7 +102,11 @@ static const char *amdgpu_asic_name[] = {  	"VEGA12",  	"VEGA20",  	"RAVEN", +	"ARCTURUS", +	"RENOIR",  	"NAVI10", +	"NAVI14", +	"NAVI12",  	"LAST",  }; @@ -413,6 +421,40 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32  }  /** + * amdgpu_invalid_rreg64 - dummy 64 bit reg read function + * + * @adev: amdgpu device pointer + * @reg: offset of register + * + * Dummy register read function.  Used for register blocks + * that certain asics don't have (all asics). + * Returns the value in the register. + */ +static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) +{ +	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); +	BUG(); +	return 0; +} + +/** + * amdgpu_invalid_wreg64 - dummy reg write function + * + * @adev: amdgpu device pointer + * @reg: offset of register + * @v: value to write to the register + * + * Dummy register read function.  Used for register blocks + * that certain asics don't have (all asics). + */ +static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) +{ +	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", +		  reg, v); +	BUG(); +} + +/**   * amdgpu_block_invalid_rreg - dummy reg read function   *   * @adev: amdgpu device pointer @@ -1384,9 +1426,21 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)  		else  			chip_name = "raven";  		break; +	case CHIP_ARCTURUS: +		chip_name = "arcturus"; +		break; +	case CHIP_RENOIR: +		chip_name = "renoir"; +		break;  	case CHIP_NAVI10:  		chip_name = "navi10";  		break; +	case CHIP_NAVI14: +		chip_name = "navi14"; +		break; +	case CHIP_NAVI12: +		chip_name = "navi12"; +		break;  	}  	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -1529,7 +1583,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)  	case CHIP_VEGA12:  	case CHIP_VEGA20:  	case CHIP_RAVEN: -		if (adev->asic_type == CHIP_RAVEN) +	case CHIP_ARCTURUS: +	case CHIP_RENOIR: +		if (adev->asic_type == CHIP_RAVEN || +		    adev->asic_type == CHIP_RENOIR)  			adev->family = AMDGPU_FAMILY_RV;  		else  			adev->family = AMDGPU_FAMILY_AI; @@ -1539,6 +1596,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)  			return r;  		break;  	case  CHIP_NAVI10: +	case  CHIP_NAVI14: +	case  CHIP_NAVI12:  		adev->family = AMDGPU_FAMILY_NV;  		r = nv_set_ip_blocks(adev); @@ -1560,9 +1619,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)  		r = amdgpu_virt_request_full_gpu(adev, true);  		if (r)  			return -EAGAIN; - -		/* query the reg access mode at the very beginning */ -		amdgpu_virt_init_reg_access_mode(adev);  	}  	adev->pm.pp_feature = amdgpu_pp_feature_mask; @@ -1665,28 +1721,34 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)  	if (adev->asic_type >= CHIP_VEGA10) {  		for (i = 0; i < adev->num_ip_blocks; i++) { -			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { -				if (adev->in_gpu_reset || adev->in_suspend) { -					if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) -						break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */ -					r = adev->ip_blocks[i].version->funcs->resume(adev); -					if (r) { -						DRM_ERROR("resume of IP block <%s> failed %d\n", +			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) +				continue; + +			/* no need to do the fw loading again if already done*/ +			if (adev->ip_blocks[i].status.hw == true) +				break; + +			if (adev->in_gpu_reset || adev->in_suspend) { +				r = adev->ip_blocks[i].version->funcs->resume(adev); +				if (r) { +					DRM_ERROR("resume of IP block <%s> failed %d\n",  							  adev->ip_blocks[i].version->funcs->name, r); -						return r; -					} -				} else { -					r = adev->ip_blocks[i].version->funcs->hw_init(adev); -					if (r) { -						DRM_ERROR("hw_init of IP block <%s> failed %d\n", -						  adev->ip_blocks[i].version->funcs->name, r); -						return r; -					} +					return r; +				} +			} else { +				r = adev->ip_blocks[i].version->funcs->hw_init(adev); +				if (r) { +					DRM_ERROR("hw_init of IP block <%s> failed %d\n", +							  adev->ip_blocks[i].version->funcs->name, r); +					return r;  				} -				adev->ip_blocks[i].status.hw = true;  			} + +			adev->ip_blocks[i].status.hw = true; +			break;  		}  	} +  	r = amdgpu_pm_load_smu_firmware(adev, &smu_version);  	return r; @@ -2128,7 +2190,9 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)  			if (r) {  				DRM_ERROR("suspend of IP block <%s> failed %d\n",  					  adev->ip_blocks[i].version->funcs->name, r); +				return r;  			} +			adev->ip_blocks[i].status.hw = false;  		}  	} @@ -2163,6 +2227,25 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)  			DRM_ERROR("suspend of IP block <%s> failed %d\n",  				  adev->ip_blocks[i].version->funcs->name, r);  		} +		adev->ip_blocks[i].status.hw = false; +		/* handle putting the SMC in the appropriate state */ +		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { +			if (is_support_sw_smu(adev)) { +				/* todo */ +			} else if (adev->powerplay.pp_funcs && +					   adev->powerplay.pp_funcs->set_mp1_state) { +				r = adev->powerplay.pp_funcs->set_mp1_state( +					adev->powerplay.pp_handle, +					adev->mp1_state); +				if (r) { +					DRM_ERROR("SMC failed to set mp1 state %d, %d\n", +						  adev->mp1_state, r); +					return r; +				} +			} +		} + +		adev->ip_blocks[i].status.hw = false;  	}  	return 0; @@ -2215,6 +2298,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)  		for (j = 0; j < adev->num_ip_blocks; j++) {  			block = &adev->ip_blocks[j]; +			block->status.hw = false;  			if (block->version->type != ip_order[i] ||  				!block->status.valid)  				continue; @@ -2223,6 +2307,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)  			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");  			if (r)  				return r; +			block->status.hw = true;  		}  	} @@ -2250,13 +2335,15 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)  			block = &adev->ip_blocks[j];  			if (block->version->type != ip_order[i] || -				!block->status.valid) +				!block->status.valid || +				block->status.hw)  				continue;  			r = block->version->funcs->hw_init(adev);  			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");  			if (r)  				return r; +			block->status.hw = true;  		}  	} @@ -2280,17 +2367,19 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)  	int i, r;  	for (i = 0; i < adev->num_ip_blocks; i++) { -		if (!adev->ip_blocks[i].status.valid) +		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)  			continue;  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { +  			r = adev->ip_blocks[i].version->funcs->resume(adev);  			if (r) {  				DRM_ERROR("resume of IP block <%s> failed %d\n",  					  adev->ip_blocks[i].version->funcs->name, r);  				return r;  			} +			adev->ip_blocks[i].status.hw = true;  		}  	} @@ -2315,7 +2404,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)  	int i, r;  	for (i = 0; i < adev->num_ip_blocks; i++) { -		if (!adev->ip_blocks[i].status.valid) +		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)  			continue;  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||  		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || @@ -2328,6 +2417,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)  				  adev->ip_blocks[i].version->funcs->name, r);  			return r;  		} +		adev->ip_blocks[i].status.hw = true;  	}  	return 0; @@ -2426,6 +2516,11 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)  #endif  #if defined(CONFIG_DRM_AMD_DC_DCN2_0)  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12: +#endif +#if defined(CONFIG_DRM_AMD_DC_DCN2_1) +	case CHIP_RENOIR:  #endif  		return amdgpu_dc != 0;  #endif @@ -2509,6 +2604,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	adev->pcie_wreg = &amdgpu_invalid_wreg;  	adev->pciep_rreg = &amdgpu_invalid_rreg;  	adev->pciep_wreg = &amdgpu_invalid_wreg; +	adev->pcie_rreg64 = &amdgpu_invalid_rreg64; +	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;  	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;  	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;  	adev->didt_rreg = &amdgpu_invalid_rreg; @@ -3389,7 +3486,7 @@ error:  	amdgpu_virt_init_data_exchange(adev);  	amdgpu_virt_release_full_gpu(adev, true);  	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { -		atomic_inc(&adev->vram_lost_counter); +		amdgpu_inc_vram_lost(adev);  		r = amdgpu_device_recover_vram(adev);  	} @@ -3431,6 +3528,7 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)  		case CHIP_VEGA20:  		case CHIP_VEGA10:  		case CHIP_VEGA12: +		case CHIP_RAVEN:  			break;  		default:  			goto disabled; @@ -3554,7 +3652,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,  				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);  				if (vram_lost) {  					DRM_INFO("VRAM is lost due to GPU reset!\n"); -					atomic_inc(&tmp_adev->vram_lost_counter); +					amdgpu_inc_vram_lost(tmp_adev);  				}  				r = amdgpu_gtt_mgr_recover( @@ -3627,6 +3725,17 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)  	atomic_inc(&adev->gpu_reset_counter);  	adev->in_gpu_reset = 1; +	switch (amdgpu_asic_reset_method(adev)) { +	case AMD_RESET_METHOD_MODE1: +		adev->mp1_state = PP_MP1_STATE_SHUTDOWN; +		break; +	case AMD_RESET_METHOD_MODE2: +		adev->mp1_state = PP_MP1_STATE_RESET; +		break; +	default: +		adev->mp1_state = PP_MP1_STATE_NONE; +		break; +	}  	/* Block kfd: SRIOV would do it separately */  	if (!amdgpu_sriov_vf(adev))                  amdgpu_amdkfd_pre_reset(adev); @@ -3640,6 +3749,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)  	if (!amdgpu_sriov_vf(adev))                  amdgpu_amdkfd_post_reset(adev);  	amdgpu_vf_error_trans_all(adev); +	adev->mp1_state = PP_MP1_STATE_NONE;  	adev->in_gpu_reset = 0;  	mutex_unlock(&adev->lock_reset);  } @@ -3684,14 +3794,14 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  	if (hive && !mutex_trylock(&hive->reset_lock)) {  		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", -			 job->base.id, hive->hive_id); +			  job ? job->base.id : -1, hive->hive_id);  		return 0;  	}  	/* Start with adev pre asic reset first for soft reset check.*/  	if (!amdgpu_device_lock_adev(adev, !hive)) {  		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", -					 job->base.id); +			  job ? job->base.id : -1);  		return 0;  	} @@ -3732,7 +3842,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  			if (!ring || !ring->sched.thread)  				continue; -			drm_sched_stop(&ring->sched, &job->base); +			drm_sched_stop(&ring->sched, job ? &job->base : NULL);  		}  	} @@ -3757,9 +3867,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  	/* Guilty job will be freed after this*/ -	r = amdgpu_device_pre_asic_reset(adev, -					 job, -					 &need_full_reset); +	r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset);  	if (r) {  		/*TODO Should we stop ?*/  		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 535650967b1a..1d4aaa9580f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -191,7 +191,8 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,  	}  	if (!adev->enable_virtual_display) { -		r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev)); +		r = amdgpu_bo_pin(new_abo, +				  amdgpu_display_supported_domains(adev, new_abo->flags));  		if (unlikely(r != 0)) {  			DRM_ERROR("failed to pin new abo buffer before flip\n");  			goto unreserve; @@ -204,7 +205,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,  		goto unpin;  	} -	r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl, +	r = dma_resv_get_fences_rcu(new_abo->tbo.base.resv, &work->excl,  					      &work->shared_count,  					      &work->shared);  	if (unlikely(r != 0)) { @@ -495,13 +496,25 @@ static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {  	.create_handle = drm_gem_fb_create_handle,  }; -uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev) +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, +					  uint64_t bo_flags)  {  	uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;  #if defined(CONFIG_DRM_AMD_DC) -	if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN && -	    adev->flags & AMD_IS_APU && +	/* +	 * if amdgpu_bo_support_uswc returns false it means that USWC mappings +	 * is not supported for this board. But this mapping is required +	 * to avoid hang caused by placement of scanout BO in GTT on certain +	 * APUs. So force the BO placement to VRAM in case this architecture +	 * will not allow USWC mappings. +	 * Also, don't allow GTT domain if the BO doens't have USWC falg set. +	 */ +	if (adev->asic_type >= CHIP_CARRIZO && +	    adev->asic_type <= CHIP_RAVEN && +	    (adev->flags & AMD_IS_APU) && +	    (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && +	    amdgpu_bo_support_uswc(bo_flags) &&  	    amdgpu_device_asic_has_dc_support(adev->asic_type))  		domain |= AMDGPU_GEM_DOMAIN_GTT;  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 06b922fe0d42..3620b24785e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -38,7 +38,8 @@  int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,  				  struct drm_file *filp);  void amdgpu_display_update_priority(struct amdgpu_device *adev); -uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, +					  uint64_t bo_flags);  struct drm_framebuffer *  amdgpu_display_user_framebuffer_create(struct drm_device *dev,  				       struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 489041df1f45..61f108ec2b5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -137,23 +137,23 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,  }  static int -__reservation_object_make_exclusive(struct reservation_object *obj) +__dma_resv_make_exclusive(struct dma_resv *obj)  {  	struct dma_fence **fences;  	unsigned int count;  	int r; -	if (!reservation_object_get_list(obj)) /* no shared fences to convert */ +	if (!dma_resv_get_list(obj)) /* no shared fences to convert */  		return 0; -	r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences); +	r = dma_resv_get_fences_rcu(obj, NULL, &count, &fences);  	if (r)  		return r;  	if (count == 0) {  		/* Now that was unexpected. */  	} else if (count == 1) { -		reservation_object_add_excl_fence(obj, fences[0]); +		dma_resv_add_excl_fence(obj, fences[0]);  		dma_fence_put(fences[0]);  		kfree(fences);  	} else { @@ -165,7 +165,7 @@ __reservation_object_make_exclusive(struct reservation_object *obj)  		if (!array)  			goto err_fences_put; -		reservation_object_add_excl_fence(obj, &array->base); +		dma_resv_add_excl_fence(obj, &array->base);  		dma_fence_put(&array->base);  	} @@ -216,7 +216,7 @@ static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,  		 * fences on the reservation object into a single exclusive  		 * fence.  		 */ -		r = __reservation_object_make_exclusive(bo->tbo.resv); +		r = __dma_resv_make_exclusive(bo->tbo.base.resv);  		if (r)  			goto error_unreserve;  	} @@ -268,20 +268,6 @@ error:  }  /** - * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation - * @obj: GEM BO - * - * Returns: - * The BO's reservation object. - */ -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) -{ -	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - -	return bo->tbo.resv; -} - -/**   * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation   * @dma_buf: Shared DMA buffer   * @direction: Direction of DMA transfer @@ -299,7 +285,7 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);  	struct ttm_operation_ctx ctx = { true, false }; -	u32 domain = amdgpu_display_supported_domains(adev); +	u32 domain = amdgpu_display_supported_domains(adev, bo->flags);  	int ret;  	bool reads = (direction == DMA_BIDIRECTIONAL ||  		      direction == DMA_FROM_DEVICE); @@ -339,14 +325,12 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {   * @gobj: GEM BO   * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.   * - * The main work is done by the &drm_gem_prime_export helper, which in turn - * uses &amdgpu_gem_prime_res_obj. + * The main work is done by the &drm_gem_prime_export helper.   *   * Returns:   * Shared DMA buffer representing the GEM BO from the given device.   */ -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, -					struct drm_gem_object *gobj, +struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,  					int flags)  {  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); @@ -356,9 +340,9 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,  	    bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)  		return ERR_PTR(-EPERM); -	buf = drm_gem_prime_export(dev, gobj, flags); +	buf = drm_gem_prime_export(gobj, flags);  	if (!IS_ERR(buf)) { -		buf->file->f_mapping = dev->anon_inode->i_mapping; +		buf->file->f_mapping = gobj->dev->anon_inode->i_mapping;  		buf->ops = &amdgpu_dmabuf_ops;  	} @@ -383,7 +367,7 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,  				 struct dma_buf_attachment *attach,  				 struct sg_table *sg)  { -	struct reservation_object *resv = attach->dmabuf->resv; +	struct dma_resv *resv = attach->dmabuf->resv;  	struct amdgpu_device *adev = dev->dev_private;  	struct amdgpu_bo *bo;  	struct amdgpu_bo_param bp; @@ -396,7 +380,7 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,  	bp.flags = 0;  	bp.type = ttm_bo_type_sg;  	bp.resv = resv; -	ww_mutex_lock(&resv->lock, NULL); +	dma_resv_lock(resv, NULL);  	ret = amdgpu_bo_create(adev, &bp, &bo);  	if (ret)  		goto error; @@ -408,11 +392,11 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,  	if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)  		bo->prime_shared_count = 1; -	ww_mutex_unlock(&resv->lock); -	return &bo->gem_base; +	dma_resv_unlock(resv); +	return &bo->tbo.base;  error: -	ww_mutex_unlock(&resv->lock); +	dma_resv_unlock(resv);  	return ERR_PTR(ret);  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h index c7056cbe8685..5012e6ab58f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h @@ -30,12 +30,10 @@ struct drm_gem_object *  amdgpu_gem_prime_import_sg_table(struct drm_device *dev,  				 struct dma_buf_attachment *attach,  				 struct sg_table *sg); -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, -					struct drm_gem_object *gobj, +struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,  					int flags);  struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,  					    struct dma_buf *dma_buf); -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);  void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);  void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);  int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 790263dcc064..3fa18003d4d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -130,13 +130,18 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT  	AMDGPU_VEGA20_DOORBELL_IH                      = 0x178,  	/* MMSCH: 392~407  	 * overlap the doorbell assignment with VCN as they are  mutually exclusive -	 * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD +	 * VCN engine's doorbell is 32 bit and two VCN ring share one QWORD  	 */ -	AMDGPU_VEGA20_DOORBELL64_VCN0_1                  = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ +	AMDGPU_VEGA20_DOORBELL64_VCN0_1                  = 0x188, /* VNC0 */  	AMDGPU_VEGA20_DOORBELL64_VCN2_3                  = 0x189,  	AMDGPU_VEGA20_DOORBELL64_VCN4_5                  = 0x18A,  	AMDGPU_VEGA20_DOORBELL64_VCN6_7                  = 0x18B, +	AMDGPU_VEGA20_DOORBELL64_VCN8_9                  = 0x18C, /* VNC1 */ +	AMDGPU_VEGA20_DOORBELL64_VCNa_b                  = 0x18D, +	AMDGPU_VEGA20_DOORBELL64_VCNc_d                  = 0x18E, +	AMDGPU_VEGA20_DOORBELL64_VCNe_f                  = 0x18F, +  	AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1             = 0x188,  	AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3             = 0x189,  	AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5             = 0x18A, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 61bd10310604..5803fcbae22f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -948,6 +948,7 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block  	case AMD_IP_BLOCK_TYPE_UVD:  	case AMD_IP_BLOCK_TYPE_VCN:  	case AMD_IP_BLOCK_TYPE_VCE: +	case AMD_IP_BLOCK_TYPE_SDMA:  		if (swsmu)  			ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);  		else @@ -956,7 +957,6 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block  		break;  	case AMD_IP_BLOCK_TYPE_GMC:  	case AMD_IP_BLOCK_TYPE_ACP: -	case AMD_IP_BLOCK_TYPE_SDMA:  		ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(  				(adev)->powerplay.pp_handle, block_type, gate));  		break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f2e8b4238efd..264677ab248a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -35,6 +35,7 @@  #include <linux/pm_runtime.h>  #include <linux/vga_switcheroo.h>  #include <drm/drm_probe_helper.h> +#include <linux/mmu_notifier.h>  #include "amdgpu.h"  #include "amdgpu_irq.h" @@ -79,9 +80,10 @@   * - 3.31.0 - Add support for per-flip tiling attribute changes with DC   * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS.   * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. + * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches   */  #define KMS_DRIVER_MAJOR	3 -#define KMS_DRIVER_MINOR	33 +#define KMS_DRIVER_MINOR	34  #define KMS_DRIVER_PATCHLEVEL	0  #define AMDGPU_MAX_TIMEOUT_PARAM_LENTH	256 @@ -142,13 +144,13 @@ int amdgpu_async_gfx_ring = 1;  int amdgpu_mcbp = 0;  int amdgpu_discovery = -1;  int amdgpu_mes = 0; -int amdgpu_noretry; +int amdgpu_noretry = 1;  struct amdgpu_mgpu_info mgpu_info = {  	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),  };  int amdgpu_ras_enable = -1; -uint amdgpu_ras_mask = 0xffffffff; +uint amdgpu_ras_mask = 0xfffffffb;  /**   * DOC: vramlimit (int) @@ -610,7 +612,7 @@ MODULE_PARM_DESC(mes,  module_param_named(mes, amdgpu_mes, int, 0444);  MODULE_PARM_DESC(noretry, -	"Disable retry faults (0 = retry enabled (default), 1 = retry disabled)"); +	"Disable retry faults (0 = retry enabled, 1 = retry disabled (default))");  module_param_named(noretry, amdgpu_noretry, int, 0644);  #ifdef CONFIG_HSA_AMD @@ -996,6 +998,11 @@ static const struct pci_device_id pciidlist[] = {  	/* Raven */  	{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},  	{0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, +	/* Arcturus */ +	{0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, +	{0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, +	{0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, +	{0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT},  	/* Navi10 */  	{0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},  	{0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, @@ -1004,6 +1011,16 @@ static const struct pci_device_id pciidlist[] = {  	{0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},  	{0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},  	{0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, +	/* Navi14 */ +	{0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, +	{0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, +	{0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, + +	/* Renoir */ +	{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, + +	/* Navi12 */ +	{0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},  	{0, 0, 0}  }; @@ -1092,21 +1109,21 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)  	 * unfortunately we can't detect certain  	 * hypervisors so just do this all the time.  	 */ +	adev->mp1_state = PP_MP1_STATE_UNLOAD;  	amdgpu_device_ip_suspend(adev); +	adev->mp1_state = PP_MP1_STATE_NONE;  }  static int amdgpu_pmops_suspend(struct device *dev)  { -	struct pci_dev *pdev = to_pci_dev(dev); +	struct drm_device *drm_dev = dev_get_drvdata(dev); -	struct drm_device *drm_dev = pci_get_drvdata(pdev);  	return amdgpu_device_suspend(drm_dev, true, true);  }  static int amdgpu_pmops_resume(struct device *dev)  { -	struct pci_dev *pdev = to_pci_dev(dev); -	struct drm_device *drm_dev = pci_get_drvdata(pdev); +	struct drm_device *drm_dev = dev_get_drvdata(dev);  	/* GPU comes up enabled by the bios on resume */  	if (amdgpu_device_is_px(drm_dev)) { @@ -1120,33 +1137,29 @@ static int amdgpu_pmops_resume(struct device *dev)  static int amdgpu_pmops_freeze(struct device *dev)  { -	struct pci_dev *pdev = to_pci_dev(dev); +	struct drm_device *drm_dev = dev_get_drvdata(dev); -	struct drm_device *drm_dev = pci_get_drvdata(pdev);  	return amdgpu_device_suspend(drm_dev, false, true);  }  static int amdgpu_pmops_thaw(struct device *dev)  { -	struct pci_dev *pdev = to_pci_dev(dev); +	struct drm_device *drm_dev = dev_get_drvdata(dev); -	struct drm_device *drm_dev = pci_get_drvdata(pdev);  	return amdgpu_device_resume(drm_dev, false, true);  }  static int amdgpu_pmops_poweroff(struct device *dev)  { -	struct pci_dev *pdev = to_pci_dev(dev); +	struct drm_device *drm_dev = dev_get_drvdata(dev); -	struct drm_device *drm_dev = pci_get_drvdata(pdev);  	return amdgpu_device_suspend(drm_dev, true, true);  }  static int amdgpu_pmops_restore(struct device *dev)  { -	struct pci_dev *pdev = to_pci_dev(dev); +	struct drm_device *drm_dev = dev_get_drvdata(dev); -	struct drm_device *drm_dev = pci_get_drvdata(pdev);  	return amdgpu_device_resume(drm_dev, false, true);  } @@ -1205,8 +1218,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)  static int amdgpu_pmops_runtime_idle(struct device *dev)  { -	struct pci_dev *pdev = to_pci_dev(dev); -	struct drm_device *drm_dev = pci_get_drvdata(pdev); +	struct drm_device *drm_dev = dev_get_drvdata(dev);  	struct drm_crtc *crtc;  	if (!amdgpu_device_is_px(drm_dev)) { @@ -1373,7 +1385,7 @@ static struct drm_driver kms_driver = {  	.driver_features =  	    DRIVER_USE_AGP | DRIVER_ATOMIC |  	    DRIVER_GEM | -	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, +	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,  	.load = amdgpu_driver_load_kms,  	.open = amdgpu_driver_open_kms,  	.postclose = amdgpu_driver_postclose_kms, @@ -1397,7 +1409,6 @@ static struct drm_driver kms_driver = {  	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,  	.gem_prime_export = amdgpu_gem_prime_export,  	.gem_prime_import = amdgpu_gem_prime_import, -	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,  	.gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,  	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,  	.gem_prime_vmap = amdgpu_gem_prime_vmap, @@ -1464,6 +1475,7 @@ static void __exit amdgpu_exit(void)  	amdgpu_unregister_atpx_handler();  	amdgpu_sync_fini();  	amdgpu_fence_slab_fini(); +	mmu_notifier_synchronize();  }  module_init(amdgpu_init); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index eb3569b46c1e..143753d237e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -131,6 +131,10 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,  	int aligned_size, size;  	int height = mode_cmd->height;  	u32 cpp; +	u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | +			       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS     | +			       AMDGPU_GEM_CREATE_VRAM_CLEARED 	     | +			       AMDGPU_GEM_CREATE_CPU_GTT_USWC;  	info = drm_get_format_info(adev->ddev, mode_cmd);  	cpp = info->cpp[0]; @@ -138,15 +142,11 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,  	/* need to align pitch with crtc limits */  	mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,  						  fb_tiled); -	domain = amdgpu_display_supported_domains(adev); - +	domain = amdgpu_display_supported_domains(adev, flags);  	height = ALIGN(mode_cmd->height, 8);  	size = mode_cmd->pitches[0] * height;  	aligned_size = ALIGN(size, PAGE_SIZE); -	ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, -				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | -				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | -				       AMDGPU_GEM_CREATE_VRAM_CLEARED, +	ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags,  				       ttm_bo_type_kernel, NULL, &gobj);  	if (ret) {  		pr_err("failed to allocate framebuffer (%d)\n", aligned_size); @@ -168,7 +168,6 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,  			dev_err(adev->dev, "FB failed to set tiling flags\n");  	} -  	ret = amdgpu_bo_pin(abo, domain);  	if (ret) {  		amdgpu_bo_unreserve(abo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index d79ab1da9e07..5e8bdded265f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -251,7 +251,9 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,  	}  	mb();  	amdgpu_asic_flush_hdp(adev, NULL); -	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); +	for (i = 0; i < adev->num_vmhubs; i++) +		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); +  	return 0;  } @@ -310,9 +312,9 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,  		     uint64_t flags)  {  #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS -	unsigned i,t,p; +	unsigned t,p;  #endif -	int r; +	int r, i;  	if (!adev->gart.ready) {  		WARN(1, "trying to bind memory to uninitialized GART !\n"); @@ -336,7 +338,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,  	mb();  	amdgpu_asic_flush_hdp(adev, NULL); -	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); +	for (i = 0; i < adev->num_vmhubs; i++) +		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index df8a23554831..f6ac1e9548f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -32,7 +32,6 @@ struct amdgpu_gds {  	uint32_t gws_size;  	uint32_t oa_size;  	uint32_t gds_compute_max_wave_id; -	uint32_t vgt_gs_max_wave_id;  };  struct amdgpu_gds_reg_offset { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 939f8305511b..8ceb44925947 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -50,7 +50,7 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)  int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,  			     int alignment, u32 initial_domain,  			     u64 flags, enum ttm_bo_type type, -			     struct reservation_object *resv, +			     struct dma_resv *resv,  			     struct drm_gem_object **obj)  {  	struct amdgpu_bo *bo; @@ -85,7 +85,7 @@ retry:  		}  		return r;  	} -	*obj = &bo->gem_base; +	*obj = &bo->tbo.base;  	return 0;  } @@ -134,7 +134,7 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,  		return -EPERM;  	if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID && -	    abo->tbo.resv != vm->root.base.bo->tbo.resv) +	    abo->tbo.base.resv != vm->root.base.bo->tbo.base.resv)  		return -EPERM;  	r = amdgpu_bo_reserve(abo, false); @@ -215,7 +215,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,  	union drm_amdgpu_gem_create *args = data;  	uint64_t flags = args->in.domain_flags;  	uint64_t size = args->in.bo_size; -	struct reservation_object *resv = NULL; +	struct dma_resv *resv = NULL;  	struct drm_gem_object *gobj;  	uint32_t handle;  	int r; @@ -252,7 +252,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,  		if (r)  			return r; -		resv = vm->root.base.bo->tbo.resv; +		resv = vm->root.base.bo->tbo.base.resv;  	}  	r = amdgpu_gem_object_create(adev, size, args->in.alignment, @@ -291,6 +291,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,  	uint32_t handle;  	int r; +	args->addr = untagged_addr(args->addr); +  	if (offset_in_page(args->addr | args->size))  		return -EINVAL; @@ -433,7 +435,7 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,  		return -ENOENT;  	}  	robj = gem_to_amdgpu_bo(gobj); -	ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, +	ret = dma_resv_wait_timeout_rcu(robj->tbo.base.resv, true, true,  						  timeout);  	/* ret == 0 means not signaled, @@ -689,7 +691,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,  		struct drm_amdgpu_gem_create_in info;  		void __user *out = u64_to_user_ptr(args->value); -		info.bo_size = robj->gem_base.size; +		info.bo_size = robj->tbo.base.size;  		info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;  		info.domains = robj->preferred_domains;  		info.domain_flags = robj->flags; @@ -747,7 +749,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,  	struct amdgpu_device *adev = dev->dev_private;  	struct drm_gem_object *gobj;  	uint32_t handle; -	u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; +	u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | +		    AMDGPU_GEM_CREATE_CPU_GTT_USWC;  	u32 domain;  	int r; @@ -764,7 +767,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,  	args->size = (u64)args->pitch * args->height;  	args->size = ALIGN(args->size, PAGE_SIZE);  	domain = amdgpu_bo_get_preferred_pin_domain(adev, -				amdgpu_display_supported_domains(adev)); +				amdgpu_display_supported_domains(adev, flags));  	r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags,  				     ttm_bo_type_device, NULL, &gobj);  	if (r) @@ -819,8 +822,8 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)  	if (pin_count)  		seq_printf(m, " pin count %d", pin_count); -	dma_buf = READ_ONCE(bo->gem_base.dma_buf); -	attachment = READ_ONCE(bo->gem_base.import_attach); +	dma_buf = READ_ONCE(bo->tbo.base.dma_buf); +	attachment = READ_ONCE(bo->tbo.base.import_attach);  	if (attachment)  		seq_printf(m, " imported from %p", dma_buf); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index b8ba6e27c61f..0b66d2e6b5d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -31,7 +31,7 @@   */  #define AMDGPU_GEM_DOMAIN_MAX		0x3 -#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base) +#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base)  void amdgpu_gem_object_free(struct drm_gem_object *obj);  int amdgpu_gem_object_open(struct drm_gem_object *obj, @@ -47,7 +47,7 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev);  int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,  			     int alignment, u32 initial_domain,  			     u64 flags, enum ttm_bo_type type, -			     struct reservation_object *resv, +			     struct dma_resv *resv,  			     struct drm_gem_object **obj);  int amdgpu_mode_dumb_create(struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 74066e1466f7..f9bef3154b99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -389,7 +389,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,  				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);  	} -	if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { +	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {  		/* create MQD for each KGQ */  		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {  			ring = &adev->gfx.gfx_ring[i]; @@ -437,7 +437,7 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)  	struct amdgpu_ring *ring = NULL;  	int i; -	if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { +	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {  		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {  			ring = &adev->gfx.gfx_ring[i];  			kfree(adev->gfx.me.mqd_backup[i]); @@ -456,7 +456,7 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)  	}  	ring = &adev->gfx.kiq.ring; -	if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) +	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring)  		kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]);  	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);  	amdgpu_bo_free_kernel(&ring->mqd_obj, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 1199b5828b90..554a59b3c4a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -196,6 +196,8 @@ struct amdgpu_gfx_funcs {  				uint32_t *dst);  	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,  				 u32 queue, u32 vmid); +	int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); +	int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status);  };  struct amdgpu_ngg_buf { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 924d83e711ef..5790db61fa2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -220,6 +220,14 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)  	const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);  	u64 size_af, size_bf; +	if (amdgpu_sriov_vf(adev)) { +		mc->agp_start = 0xffffffff; +		mc->agp_end = 0x0; +		mc->agp_size = 0; + +		return; +	} +  	if (mc->fb_start > mc->gart_start) {  		size_bf = (mc->fb_start & sixteen_gb_mask) -  			ALIGN(mc->gart_end + 1, sixteen_gb); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 071145ac67b5..b6e1d98ef01e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -89,8 +89,8 @@ struct amdgpu_vmhub {   */  struct amdgpu_gmc_funcs {  	/* flush the vm tlb via mmio */ -	void (*flush_gpu_tlb)(struct amdgpu_device *adev, -			      uint32_t vmid, uint32_t flush_type); +	void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, +				uint32_t vmhub, uint32_t flush_type);  	/* flush the vm tlb via ring */  	uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,  				       uint64_t pd_addr); @@ -177,10 +177,11 @@ struct amdgpu_gmc {  	struct amdgpu_xgmi xgmi;  	struct amdgpu_irq_src	ecc_irq; -	struct ras_common_if    *ras_if; +	struct ras_common_if    *umc_ras_if; +	struct ras_common_if    *mmhub_ras_if;  }; -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))  #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))  #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))  #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 7850084a05e3..60655834d649 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -143,7 +143,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,  	/* ring tests don't use a job */  	if (job) {  		vm = job->vm; -		fence_ctx = job->base.s_fence->scheduled.context; +		fence_ctx = job->base.s_fence ? +			job->base.s_fence->scheduled.context : 0;  	} else {  		vm = NULL;  		fence_ctx = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 57b3d8a9bef3..53734da1c2df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -104,7 +104,7 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence,   *   * Free the pasid only after all the fences in resv are signaled.   */ -void amdgpu_pasid_free_delayed(struct reservation_object *resv, +void amdgpu_pasid_free_delayed(struct dma_resv *resv,  			       unsigned int pasid)  {  	struct dma_fence *fence, **fences; @@ -112,7 +112,7 @@ void amdgpu_pasid_free_delayed(struct reservation_object *resv,  	unsigned count;  	int r; -	r = reservation_object_get_fences_rcu(resv, NULL, &count, &fences); +	r = dma_resv_get_fences_rcu(resv, NULL, &count, &fences);  	if (r)  		goto fallback; @@ -156,7 +156,7 @@ fallback:  	/* Not enough memory for the delayed delete, as last resort  	 * block for all the fences to complete.  	 */ -	reservation_object_wait_timeout_rcu(resv, true, false, +	dma_resv_wait_timeout_rcu(resv, true, false,  					    MAX_SCHEDULE_TIMEOUT);  	amdgpu_pasid_free(pasid);  } @@ -368,7 +368,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,  		 * are broken on Navi10 and Navi14.  		 */  		if (needs_flush && (adev->asic_type < CHIP_VEGA10 || -				    adev->asic_type == CHIP_NAVI10)) +				    adev->asic_type == CHIP_NAVI10 || +				    adev->asic_type == CHIP_NAVI14))  			continue;  		/* Good, we can use this VMID. Remember this submission as diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 7625419f0fc2..8e58325bbca2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -72,7 +72,7 @@ struct amdgpu_vmid_mgr {  int amdgpu_pasid_alloc(unsigned int bits);  void amdgpu_pasid_free(unsigned int pasid); -void amdgpu_pasid_free_delayed(struct reservation_object *resv, +void amdgpu_pasid_free_delayed(struct dma_resv *resv,  			       unsigned int pasid);  bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 0cf7e8606fd3..f6147528be64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -408,23 +408,38 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,  		break;  	case AMDGPU_HW_IP_VCN_DEC:  		type = AMD_IP_BLOCK_TYPE_VCN; -		if (adev->vcn.ring_dec.sched.ready) -			++num_rings; +		for (i = 0; i < adev->vcn.num_vcn_inst; i++) { +			if (adev->uvd.harvest_config & (1 << i)) +				continue; + +			if (adev->vcn.inst[i].ring_dec.sched.ready) +				++num_rings; +		}  		ib_start_alignment = 16;  		ib_size_alignment = 16;  		break;  	case AMDGPU_HW_IP_VCN_ENC:  		type = AMD_IP_BLOCK_TYPE_VCN; -		for (i = 0; i < adev->vcn.num_enc_rings; i++) -			if (adev->vcn.ring_enc[i].sched.ready) -				++num_rings; +		for (i = 0; i < adev->vcn.num_vcn_inst; i++) { +			if (adev->uvd.harvest_config & (1 << i)) +				continue; + +			for (j = 0; j < adev->vcn.num_enc_rings; j++) +				if (adev->vcn.inst[i].ring_enc[j].sched.ready) +					++num_rings; +		}  		ib_start_alignment = 64;  		ib_size_alignment = 1;  		break;  	case AMDGPU_HW_IP_VCN_JPEG:  		type = AMD_IP_BLOCK_TYPE_VCN; -		if (adev->vcn.ring_jpeg.sched.ready) -			++num_rings; +		for (i = 0; i < adev->vcn.num_vcn_inst; i++) { +			if (adev->uvd.harvest_config & (1 << i)) +				continue; + +			if (adev->vcn.inst[i].ring_jpeg.sched.ready) +				++num_rings; +		}  		ib_start_alignment = 16;  		ib_size_alignment = 16;  		break; @@ -662,6 +677,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file  		if (sh_num == AMDGPU_INFO_MMR_SH_INDEX_MASK)  			sh_num = 0xffffffff; +		if (info->read_mmr_reg.count > 128) +			return -EINVAL; +  		regs = kmalloc_array(info->read_mmr_reg.count, sizeof(*regs), GFP_KERNEL);  		if (!regs)  			return -ENOMEM; @@ -1088,7 +1106,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,  	amdgpu_vm_fini(adev, &fpriv->vm);  	if (pasid) -		amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); +		amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid);  	amdgpu_bo_unref(&pd);  	idr_for_each_entry(&fpriv->bo_list_handles, list, handle) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h new file mode 100644 index 000000000000..2d75ecfa199b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2019  Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_MMHUB_H__ +#define __AMDGPU_MMHUB_H__ + +struct amdgpu_mmhub_funcs { +	void (*ras_init)(struct amdgpu_device *adev); +	void (*query_ras_error_count)(struct amdgpu_device *adev, +					void *ras_error_status); +}; + +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 3971c201f320..31d4deb5d294 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -179,7 +179,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,  		if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))  			continue; -		r = reservation_object_wait_timeout_rcu(bo->tbo.resv, +		r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,  			true, false, MAX_SCHEDULE_TIMEOUT);  		if (r <= 0)  			DRM_ERROR("(%ld) failed to wait for user bo\n", r); @@ -195,13 +195,14 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,   * Block for operations on BOs to finish and mark pages as accessed and   * potentially dirty.   */ -static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, -			const struct hmm_update *update) +static int +amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror, +			      const struct mmu_notifier_range *update)  {  	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);  	unsigned long start = update->start;  	unsigned long end = update->end; -	bool blockable = update->blockable; +	bool blockable = mmu_notifier_range_blockable(update);  	struct interval_tree_node *it;  	/* notification is exclusive, but interval is inclusive */ @@ -243,13 +244,14 @@ static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,   * necessitates evicting all user-mode queues of the process. The BOs   * are restorted in amdgpu_mn_invalidate_range_end_hsa.   */ -static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, -			const struct hmm_update *update) +static int +amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror, +			      const struct mmu_notifier_range *update)  {  	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);  	unsigned long start = update->start;  	unsigned long end = update->end; -	bool blockable = update->blockable; +	bool blockable = mmu_notifier_range_blockable(update);  	struct interval_tree_node *it;  	/* notification is exclusive, but interval is inclusive */ @@ -482,6 +484,5 @@ void amdgpu_hmm_init_range(struct hmm_range *range)  		range->flags = hmm_range_flags;  		range->values = hmm_range_values;  		range->pfn_shift = PAGE_SHIFT; -		INIT_LIST_HEAD(&range->list);  	}  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index bea6f298dfdc..1fead0e8b890 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -80,14 +80,11 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)  	if (bo->pin_count > 0)  		amdgpu_bo_subtract_pin_size(bo); -	if (bo->kfd_bo) -		amdgpu_amdkfd_unreserve_memory_limit(bo); -  	amdgpu_bo_kunmap(bo); -	if (bo->gem_base.import_attach) -		drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); -	drm_gem_object_release(&bo->gem_base); +	if (bo->tbo.base.import_attach) +		drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); +	drm_gem_object_release(&bo->tbo.base);  	/* in case amdgpu_device_recover_vram got NULL of bo->parent */  	if (!list_empty(&bo->shadow_list)) {  		mutex_lock(&adev->shadow_list_lock); @@ -249,8 +246,9 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,  	bp.size = size;  	bp.byte_align = align;  	bp.domain = domain; -	bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | -		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; +	bp.flags = cpu_addr ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED +		: AMDGPU_GEM_CREATE_NO_CPU_ACCESS; +	bp.flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;  	bp.type = ttm_bo_type_kernel;  	bp.resv = NULL; @@ -413,6 +411,40 @@ fail:  	return false;  } +bool amdgpu_bo_support_uswc(u64 bo_flags) +{ + +#ifdef CONFIG_X86_32 +	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit +	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 +	 */ +	return false; +#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) +	/* Don't try to enable write-combining when it can't work, or things +	 * may be slow +	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 +	 */ + +#ifndef CONFIG_COMPILE_TEST +#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ +	 thanks to write-combining +#endif + +	if (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) +		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " +			      "better performance thanks to write-combining\n"); +	return false; +#else +	/* For architectures that don't support WC memory, +	 * mask out the WC flag from the BO +	 */ +	if (!drm_arch_can_wc_memory()) +		return false; + +	return true; +#endif +} +  static int amdgpu_bo_do_create(struct amdgpu_device *adev,  			       struct amdgpu_bo_param *bp,  			       struct amdgpu_bo **bo_ptr) @@ -454,7 +486,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,  	bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);  	if (bo == NULL)  		return -ENOMEM; -	drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); +	drm_gem_private_object_init(adev->ddev, &bo->tbo.base, size);  	INIT_LIST_HEAD(&bo->shadow_list);  	bo->vm_bo = NULL;  	bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : @@ -466,33 +498,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,  	bo->flags = bp->flags; -#ifdef CONFIG_X86_32 -	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit -	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 -	 */ -	bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) -	/* Don't try to enable write-combining when it can't work, or things -	 * may be slow -	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 -	 */ - -#ifndef CONFIG_COMPILE_TEST -#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ -	 thanks to write-combining -#endif - -	if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) -		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " -			      "better performance thanks to write-combining\n"); -	bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#else -	/* For architectures that don't support WC memory, -	 * mask out the WC flag from the BO -	 */ -	if (!drm_arch_can_wc_memory()) +	if (!amdgpu_bo_support_uswc(bo->flags))  		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#endif  	bo->tbo.bdev = &adev->mman.bdev;  	if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | @@ -521,7 +528,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,  	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {  		struct dma_fence *fence; -		r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); +		r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence);  		if (unlikely(r))  			goto fail_unreserve; @@ -544,7 +551,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,  fail_unreserve:  	if (!bp->resv) -		ww_mutex_unlock(&bo->tbo.resv->lock); +		dma_resv_unlock(bo->tbo.base.resv);  	amdgpu_bo_unref(&bo);  	return r;  } @@ -565,7 +572,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,  	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |  		AMDGPU_GEM_CREATE_SHADOW;  	bp.type = ttm_bo_type_kernel; -	bp.resv = bo->tbo.resv; +	bp.resv = bo->tbo.base.resv;  	r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);  	if (!r) { @@ -606,13 +613,13 @@ int amdgpu_bo_create(struct amdgpu_device *adev,  	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {  		if (!bp->resv) -			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, +			WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv,  							NULL));  		r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);  		if (!bp->resv) -			reservation_object_unlock((*bo_ptr)->tbo.resv); +			dma_resv_unlock((*bo_ptr)->tbo.base.resv);  		if (r)  			amdgpu_bo_unref(bo_ptr); @@ -709,7 +716,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)  		return 0;  	} -	r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false, +	r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, false, false,  						MAX_SCHEDULE_TIMEOUT);  	if (r < 0)  		return r; @@ -1087,7 +1094,7 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)   */  void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)  { -	lockdep_assert_held(&bo->tbo.resv->lock.base); +	dma_resv_assert_held(bo->tbo.base.resv);  	if (tiling_flags)  		*tiling_flags = bo->tiling_flags; @@ -1212,6 +1219,42 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,  }  /** + * amdgpu_bo_move_notify - notification about a BO being released + * @bo: pointer to a buffer object + * + * Wipes VRAM buffers whose contents should not be leaked before the + * memory is released. + */ +void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) +{ +	struct dma_fence *fence = NULL; +	struct amdgpu_bo *abo; +	int r; + +	if (!amdgpu_bo_is_amdgpu_bo(bo)) +		return; + +	abo = ttm_to_amdgpu_bo(bo); + +	if (abo->kfd_bo) +		amdgpu_amdkfd_unreserve_memory_limit(abo); + +	if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node || +	    !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) +		return; + +	dma_resv_lock(bo->base.resv, NULL); + +	r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); +	if (!WARN_ON(r)) { +		amdgpu_bo_fence(abo, fence, false); +		dma_fence_put(fence); +	} + +	dma_resv_unlock(bo->base.resv); +} + +/**   * amdgpu_bo_fault_reserve_notify - notification about a memory fault   * @bo: pointer to a buffer object   * @@ -1283,12 +1326,12 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)  void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,  		     bool shared)  { -	struct reservation_object *resv = bo->tbo.resv; +	struct dma_resv *resv = bo->tbo.base.resv;  	if (shared) -		reservation_object_add_shared_fence(resv, fence); +		dma_resv_add_shared_fence(resv, fence);  	else -		reservation_object_add_excl_fence(resv, fence); +		dma_resv_add_excl_fence(resv, fence);  }  /** @@ -1308,7 +1351,7 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr)  	int r;  	amdgpu_sync_create(&sync); -	amdgpu_sync_resv(adev, &sync, bo->tbo.resv, owner, false); +	amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv, owner, false);  	r = amdgpu_sync_wait(&sync, intr);  	amdgpu_sync_free(&sync); @@ -1328,7 +1371,7 @@ int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr)  u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)  {  	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); -	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && +	WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) &&  		     !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel);  	WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);  	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index d60593cc436e..658f4c9779b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -41,7 +41,7 @@ struct amdgpu_bo_param {  	u32				preferred_domain;  	u64				flags;  	enum ttm_bo_type		type; -	struct reservation_object	*resv; +	struct dma_resv	*resv;  };  /* bo virtual addresses in a vm */ @@ -94,7 +94,6 @@ struct amdgpu_bo {  	/* per VM structure for page tables and with virtual addresses */  	struct amdgpu_vm_bo_base	*vm_bo;  	/* Constant after initialization */ -	struct drm_gem_object		gem_base;  	struct amdgpu_bo		*parent;  	struct amdgpu_bo		*shadow; @@ -192,7 +191,7 @@ static inline unsigned amdgpu_bo_gpu_page_alignment(struct amdgpu_bo *bo)   */  static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)  { -	return drm_vma_node_offset_addr(&bo->tbo.vma_node); +	return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);  }  /** @@ -265,6 +264,7 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,  void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,  			   bool evict,  			   struct ttm_mem_reg *new_mem); +void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);  int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);  void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,  		     bool shared); @@ -308,5 +308,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,  					 struct seq_file *m);  #endif +bool amdgpu_bo_support_uswc(u64 bo_flags); +  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 8b7efd0a7028..03930313c263 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -159,12 +159,16 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev,  	struct amdgpu_device *adev = ddev->dev_private;  	enum amd_pm_state_type pm; -	if (is_support_sw_smu(adev) && adev->smu.ppt_funcs->get_current_power_state) -		pm = amdgpu_smu_get_current_power_state(adev); -	else if (adev->powerplay.pp_funcs->get_current_power_state) +	if (is_support_sw_smu(adev)) { +		if (adev->smu.ppt_funcs->get_current_power_state) +			pm = amdgpu_smu_get_current_power_state(adev); +		else +			pm = adev->pm.dpm.user_state; +	} else if (adev->powerplay.pp_funcs->get_current_power_state) {  		pm = amdgpu_dpm_get_current_power_state(adev); -	else +	} else {  		pm = adev->pm.dpm.user_state; +	}  	return snprintf(buf, PAGE_SIZE, "%s\n",  			(pm == POWER_STATE_TYPE_BATTERY) ? "battery" : @@ -191,7 +195,11 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,  		goto fail;  	} -	if (adev->powerplay.pp_funcs->dispatch_tasks) { +	if (is_support_sw_smu(adev)) { +		mutex_lock(&adev->pm.mutex); +		adev->pm.dpm.user_state = state; +		mutex_unlock(&adev->pm.mutex); +	} else if (adev->powerplay.pp_funcs->dispatch_tasks) {  		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state);  	} else {  		mutex_lock(&adev->pm.mutex); @@ -317,13 +325,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,  	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))  		return -EINVAL; -	if (!amdgpu_sriov_vf(adev)) { -		if (is_support_sw_smu(adev)) -			current_level = smu_get_performance_level(&adev->smu); -		else if (adev->powerplay.pp_funcs->get_performance_level) -			current_level = amdgpu_dpm_get_performance_level(adev); -	} -  	if (strncmp("low", buf, strlen("low")) == 0) {  		level = AMD_DPM_FORCED_LEVEL_LOW;  	} else if (strncmp("high", buf, strlen("high")) == 0) { @@ -347,17 +348,23 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,  		goto fail;  	} -        if (amdgpu_sriov_vf(adev)) { -                if (amdgim_is_hwperf(adev) && -                    adev->virt.ops->force_dpm_level) { -                        mutex_lock(&adev->pm.mutex); -                        adev->virt.ops->force_dpm_level(adev, level); -                        mutex_unlock(&adev->pm.mutex); -                        return count; -                } else { -                        return -EINVAL; +	/* handle sriov case here */ +	if (amdgpu_sriov_vf(adev)) { +		if (amdgim_is_hwperf(adev) && +		    adev->virt.ops->force_dpm_level) { +			mutex_lock(&adev->pm.mutex); +			adev->virt.ops->force_dpm_level(adev, level); +			mutex_unlock(&adev->pm.mutex); +			return count; +		} else { +			return -EINVAL;  		} -        } +	} + +	if (is_support_sw_smu(adev)) +		current_level = smu_get_performance_level(&adev->smu); +	else if (adev->powerplay.pp_funcs->get_performance_level) +		current_level = amdgpu_dpm_get_performance_level(adev);  	if (current_level == level)  		return count; @@ -738,10 +745,10 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,  }  /** - * DOC: ppfeatures + * DOC: pp_features   *   * The amdgpu driver provides a sysfs API for adjusting what powerplay - * features to be enabled. The file ppfeatures is used for this. And + * features to be enabled. The file pp_features is used for this. And   * this is only available for Vega10 and later dGPUs.   *   * Reading back the file will show you the followings: @@ -753,7 +760,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,   * the corresponding bit from original ppfeature masks and input the   * new ppfeature masks.   */ -static ssize_t amdgpu_set_ppfeature_status(struct device *dev, +static ssize_t amdgpu_set_pp_feature_status(struct device *dev,  		struct device_attribute *attr,  		const char *buf,  		size_t count) @@ -770,7 +777,7 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,  	pr_debug("featuremask = 0x%llx\n", featuremask);  	if (is_support_sw_smu(adev)) { -		ret = smu_set_ppfeature_status(&adev->smu, featuremask); +		ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);  		if (ret)  			return -EINVAL;  	} else if (adev->powerplay.pp_funcs->set_ppfeature_status) { @@ -782,7 +789,7 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,  	return count;  } -static ssize_t amdgpu_get_ppfeature_status(struct device *dev, +static ssize_t amdgpu_get_pp_feature_status(struct device *dev,  		struct device_attribute *attr,  		char *buf)  { @@ -790,7 +797,7 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,  	struct amdgpu_device *adev = ddev->dev_private;  	if (is_support_sw_smu(adev)) { -		return smu_get_ppfeature_status(&adev->smu, buf); +		return smu_sys_get_pp_feature_mask(&adev->smu, buf);  	} else if (adev->powerplay.pp_funcs->get_ppfeature_status)  		return amdgpu_dpm_get_ppfeature_status(adev, buf); @@ -1450,9 +1457,9 @@ static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,  static DEVICE_ATTR(mem_busy_percent, S_IRUGO,  		amdgpu_get_memory_busy_percent, NULL);  static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); -static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, -		amdgpu_get_ppfeature_status, -		amdgpu_set_ppfeature_status); +static DEVICE_ATTR(pp_features, S_IRUGO | S_IWUSR, +		amdgpu_get_pp_feature_status, +		amdgpu_set_pp_feature_status);  static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);  static ssize_t amdgpu_hwmon_show_temp(struct device *dev, @@ -1617,20 +1624,16 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,  	     (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))  		return -EINVAL; -	if (is_support_sw_smu(adev)) { -		err = kstrtoint(buf, 10, &value); -		if (err) -			return err; +	err = kstrtoint(buf, 10, &value); +	if (err) +		return err; +	if (is_support_sw_smu(adev)) {  		smu_set_fan_control_mode(&adev->smu, value);  	} else {  		if (!adev->powerplay.pp_funcs->set_fan_control_mode)  			return -EINVAL; -		err = kstrtoint(buf, 10, &value); -		if (err) -			return err; -  		amdgpu_dpm_set_fan_control_mode(adev, value);  	} @@ -1734,7 +1737,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,  		return -EINVAL;  	if (is_support_sw_smu(adev)) { -		err = smu_get_current_rpm(&adev->smu, &speed); +		err = smu_get_fan_speed_rpm(&adev->smu, &speed);  		if (err)  			return err;  	} else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { @@ -1794,7 +1797,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,  		return -EINVAL;  	if (is_support_sw_smu(adev)) { -		err = smu_get_current_rpm(&adev->smu, &rpm); +		err = smu_get_fan_speed_rpm(&adev->smu, &rpm);  		if (err)  			return err;  	} else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { @@ -2050,16 +2053,18 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,  		return err;  	value = value / 1000000; /* convert to Watt */ +  	if (is_support_sw_smu(adev)) { -		adev->smu.funcs->set_power_limit(&adev->smu, value); +		err = smu_set_power_limit(&adev->smu, value);  	} else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) {  		err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); -		if (err) -			return err;  	} else { -		return -EINVAL; +		err = -EINVAL;  	} +	if (err) +		return err; +  	return count;  } @@ -2344,7 +2349,9 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,  			effective_mode &= ~S_IWUSR;  	} -	if ((adev->flags & AMD_IS_APU) && +	if (((adev->flags & AMD_IS_APU) || +	     adev->family == AMDGPU_FAMILY_SI ||	/* not implemented yet */ +	     adev->family == AMDGPU_FAMILY_KV) &&	/* not implemented yet */  	    (attr == &sensor_dev_attr_power1_average.dev_attr.attr ||  	     attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||  	     attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| @@ -2368,6 +2375,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,  			return 0;  	} +	if ((adev->family == AMDGPU_FAMILY_SI ||	/* not implemented yet */ +	     adev->family == AMDGPU_FAMILY_KV) &&	/* not implemented yet */ +	    (attr == &sensor_dev_attr_in0_input.dev_attr.attr || +	     attr == &sensor_dev_attr_in0_label.dev_attr.attr)) +		return 0; +  	/* only APUs have vddnb */  	if (!(adev->flags & AMD_IS_APU) &&  	    (attr == &sensor_dev_attr_in1_input.dev_attr.attr || @@ -2823,10 +2836,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)  			DRM_ERROR("failed to create device file pp_dpm_socclk\n");  			return ret;  		} -		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); -		if (ret) { -			DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); -			return ret; +		if (adev->asic_type != CHIP_ARCTURUS) { +			ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); +			if (ret) { +				DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); +				return ret; +			}  		}  	}  	if (adev->asic_type >= CHIP_VEGA20) { @@ -2836,10 +2851,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)  			return ret;  		}  	} -	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); -	if (ret) { -		DRM_ERROR("failed to create device file pp_dpm_pcie\n"); -		return ret; +	if (adev->asic_type != CHIP_ARCTURUS) { +		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); +		if (ret) { +			DRM_ERROR("failed to create device file pp_dpm_pcie\n"); +			return ret; +		}  	}  	ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od);  	if (ret) { @@ -2909,10 +2926,10 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)  	if ((adev->asic_type >= CHIP_VEGA10) &&  	    !(adev->flags & AMD_IS_APU)) {  		ret = device_create_file(adev->dev, -				&dev_attr_ppfeatures); +				&dev_attr_pp_features);  		if (ret) {  			DRM_ERROR("failed to create device file	" -					"ppfeatures\n"); +					"pp_features\n");  			return ret;  		}  	} @@ -2943,9 +2960,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)  	device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);  	if (adev->asic_type >= CHIP_VEGA10) {  		device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk); -		device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk); +		if (adev->asic_type != CHIP_ARCTURUS) +			device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk);  	} -	device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); +	if (adev->asic_type != CHIP_ARCTURUS) +		device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);  	if (adev->asic_type >= CHIP_VEGA20)  		device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk);  	device_remove_file(adev->dev, &dev_attr_pp_sclk_od); @@ -2966,7 +2985,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)  		device_remove_file(adev->dev, &dev_attr_unique_id);  	if ((adev->asic_type >= CHIP_VEGA10) &&  	    !(adev->flags & AMD_IS_APU)) -		device_remove_file(adev->dev, &dev_attr_ppfeatures); +		device_remove_file(adev->dev, &dev_attr_pp_features);  }  void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) @@ -3067,28 +3086,44 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a  	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, (void *)&value64, &size))  		seq_printf(m, "SMC Feature Mask: 0x%016llx\n", value64); -	/* UVD clocks */ -	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { -		if (!value) { -			seq_printf(m, "UVD: Disabled\n"); -		} else { -			seq_printf(m, "UVD: Enabled\n"); -			if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) -				seq_printf(m, "\t%u MHz (DCLK)\n", value/100); -			if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) -				seq_printf(m, "\t%u MHz (VCLK)\n", value/100); +	if (adev->asic_type > CHIP_VEGA20) { +		/* VCN clocks */ +		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCN_POWER_STATE, (void *)&value, &size)) { +			if (!value) { +				seq_printf(m, "VCN: Disabled\n"); +			} else { +				seq_printf(m, "VCN: Enabled\n"); +				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) +					seq_printf(m, "\t%u MHz (DCLK)\n", value/100); +				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) +					seq_printf(m, "\t%u MHz (VCLK)\n", value/100); +			}  		} -	} -	seq_printf(m, "\n"); +		seq_printf(m, "\n"); +	} else { +		/* UVD clocks */ +		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { +			if (!value) { +				seq_printf(m, "UVD: Disabled\n"); +			} else { +				seq_printf(m, "UVD: Enabled\n"); +				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) +					seq_printf(m, "\t%u MHz (DCLK)\n", value/100); +				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) +					seq_printf(m, "\t%u MHz (VCLK)\n", value/100); +			} +		} +		seq_printf(m, "\n"); -	/* VCE clocks */ -	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) { -		if (!value) { -			seq_printf(m, "VCE: Disabled\n"); -		} else { -			seq_printf(m, "VCE: Enabled\n"); -			if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size)) -				seq_printf(m, "\t%u MHz (ECCLK)\n", value/100); +		/* VCE clocks */ +		if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) { +			if (!value) { +				seq_printf(m, "VCE: Disabled\n"); +			} else { +				seq_printf(m, "VCE: Enabled\n"); +				if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size)) +					seq_printf(m, "\t%u MHz (ECCLK)\n", value/100); +			}  		}  	} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index c027e5e7713e..4d71537a960d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -32,6 +32,7 @@  #include "psp_v3_1.h"  #include "psp_v10_0.h"  #include "psp_v11_0.h" +#include "psp_v12_0.h"  static void psp_set_funcs(struct amdgpu_device *adev); @@ -53,13 +54,19 @@ static int psp_early_init(void *handle)  		psp->autoload_supported = false;  		break;  	case CHIP_VEGA20: +	case CHIP_ARCTURUS:  		psp_v11_0_set_psp_funcs(psp);  		psp->autoload_supported = false;  		break;  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		psp_v11_0_set_psp_funcs(psp);  		psp->autoload_supported = true;  		break; +	case CHIP_RENOIR: +		psp_v12_0_set_psp_funcs(psp); +		break;  	default:  		return -EINVAL;  	} @@ -137,8 +144,7 @@ psp_cmd_submit_buf(struct psp_context *psp,  	memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp));  	index = atomic_inc_return(&psp->fence_value); -	ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, -			     fence_mc_addr, index); +	ret = psp_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index);  	if (ret) {  		atomic_dec(&psp->fence_value);  		mutex_unlock(&psp->mutex); @@ -162,8 +168,8 @@ psp_cmd_submit_buf(struct psp_context *psp,  		if (ucode)  			DRM_WARN("failed to load ucode id (%d) ",  				  ucode->ucode_id); -		DRM_WARN("psp command failed and response status is (%d)\n", -			  psp->cmd_buf_mem->resp.status); +		DRM_WARN("psp command failed and response status is (0x%X)\n", +			  psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK);  		if (!timeout) {  			mutex_unlock(&psp->mutex);  			return -EINVAL; @@ -233,6 +239,8 @@ static int psp_tmr_init(struct psp_context *psp)  {  	int ret;  	int tmr_size; +	void *tmr_buf; +	void **pptr;  	/*  	 * According to HW engineer, they prefer the TMR address be "naturally @@ -255,9 +263,10 @@ static int psp_tmr_init(struct psp_context *psp)  		}  	} +	pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;  	ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE,  				      AMDGPU_GEM_DOMAIN_VRAM, -				      &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); +				      &psp->tmr_bo, &psp->tmr_mc_addr, pptr);  	return ret;  } @@ -831,7 +840,6 @@ static int psp_hw_start(struct psp_context *psp)  				"XGMI: Failed to initialize XGMI session\n");  	} -  	if (psp->adev->psp.ta_fw) {  		ret = psp_ras_initialize(psp);  		if (ret) @@ -852,6 +860,24 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,  	case AMDGPU_UCODE_ID_SDMA1:  		*type = GFX_FW_TYPE_SDMA1;  		break; +	case AMDGPU_UCODE_ID_SDMA2: +		*type = GFX_FW_TYPE_SDMA2; +		break; +	case AMDGPU_UCODE_ID_SDMA3: +		*type = GFX_FW_TYPE_SDMA3; +		break; +	case AMDGPU_UCODE_ID_SDMA4: +		*type = GFX_FW_TYPE_SDMA4; +		break; +	case AMDGPU_UCODE_ID_SDMA5: +		*type = GFX_FW_TYPE_SDMA5; +		break; +	case AMDGPU_UCODE_ID_SDMA6: +		*type = GFX_FW_TYPE_SDMA6; +		break; +	case AMDGPU_UCODE_ID_SDMA7: +		*type = GFX_FW_TYPE_SDMA7; +		break;  	case AMDGPU_UCODE_ID_CP_CE:  		*type = GFX_FW_TYPE_CP_CE;  		break; @@ -920,6 +946,60 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,  	return 0;  } +static void psp_print_fw_hdr(struct psp_context *psp, +			     struct amdgpu_firmware_info *ucode) +{ +	struct amdgpu_device *adev = psp->adev; +	const struct sdma_firmware_header_v1_0 *sdma_hdr = +		(const struct sdma_firmware_header_v1_0 *) +		adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; +	const struct gfx_firmware_header_v1_0 *ce_hdr = +		(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; +	const struct gfx_firmware_header_v1_0 *pfp_hdr = +		(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; +	const struct gfx_firmware_header_v1_0 *me_hdr = +		(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; +	const struct gfx_firmware_header_v1_0 *mec_hdr = +		(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; +	const struct rlc_firmware_header_v2_0 *rlc_hdr = +		(const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; +	const struct smc_firmware_header_v1_0 *smc_hdr = +		(const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; + +	switch (ucode->ucode_id) { +	case AMDGPU_UCODE_ID_SDMA0: +	case AMDGPU_UCODE_ID_SDMA1: +	case AMDGPU_UCODE_ID_SDMA2: +	case AMDGPU_UCODE_ID_SDMA3: +	case AMDGPU_UCODE_ID_SDMA4: +	case AMDGPU_UCODE_ID_SDMA5: +	case AMDGPU_UCODE_ID_SDMA6: +	case AMDGPU_UCODE_ID_SDMA7: +		amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header); +		break; +	case AMDGPU_UCODE_ID_CP_CE: +		amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); +		break; +	case AMDGPU_UCODE_ID_CP_PFP: +		amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); +		break; +	case AMDGPU_UCODE_ID_CP_ME: +		amdgpu_ucode_print_gfx_hdr(&me_hdr->header); +		break; +	case AMDGPU_UCODE_ID_CP_MEC1: +		amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); +		break; +	case AMDGPU_UCODE_ID_RLC_G: +		amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header); +		break; +	case AMDGPU_UCODE_ID_SMC: +		amdgpu_ucode_print_smc_hdr(&smc_hdr->header); +		break; +	default: +		break; +	} +} +  static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,  				       struct psp_gfx_cmd_resp *cmd)  { @@ -980,17 +1060,31 @@ out:  		if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&  		    (psp_smu_reload_quirk(psp) || psp->autoload_supported))  			continue; +  		if (amdgpu_sriov_vf(adev) &&  		   (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0  		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 +		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 +		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3 +		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4 +		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5 +		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6 +		    || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7  		    || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G))  			/*skip ucode loading in SRIOV VF */  			continue; +  		if (psp->autoload_supported &&  		    (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT ||  		     ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT))  			/* skip mec JT when autoload is enabled */  			continue; +		/* Renoir only needs to load mec jump table one time */ +		if (adev->asic_type == CHIP_RENOIR && +		    ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT) +			continue; + +		psp_print_fw_hdr(psp, ucode);  		ret = psp_execute_np_fw_load(psp, ucode);  		if (ret) @@ -1115,6 +1209,8 @@ static int psp_hw_fini(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	struct psp_context *psp = &adev->psp; +	void *tmr_buf; +	void **pptr;  	if (adev->gmc.xgmi.num_physical_nodes > 1 &&  	    psp->xgmi_context.initialized == 1) @@ -1125,7 +1221,8 @@ static int psp_hw_fini(void *handle)  	psp_ring_destroy(psp, PSP_RING_TYPE__KM); -	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); +	pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; +	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);  	amdgpu_bo_free_kernel(&psp->fw_pri_bo,  			      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);  	amdgpu_bo_free_kernel(&psp->fence_buf_bo, @@ -1329,3 +1426,12 @@ const struct amdgpu_ip_block_version psp_v11_0_ip_block =  	.rev = 0,  	.funcs = &psp_ip_funcs,  }; + +const struct amdgpu_ip_block_version psp_v12_0_ip_block = +{ +	.type = AMD_IP_BLOCK_TYPE_PSP, +	.major = 12, +	.minor = 0, +	.rev = 0, +	.funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index e0fc2a790e53..bc0947f6bc8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -90,7 +90,6 @@ struct psp_funcs  	int (*ring_destroy)(struct psp_context *psp,  			    enum psp_ring_type ring_type);  	int (*cmd_submit)(struct psp_context *psp, -			  struct amdgpu_firmware_info *ucode,  			  uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,  			  int index);  	bool (*compare_sram_data)(struct psp_context *psp, @@ -172,7 +171,6 @@ struct psp_context  	/* tmr buffer */  	struct amdgpu_bo		*tmr_bo;  	uint64_t			tmr_mc_addr; -	void				*tmr_buf;  	/* asd firmware and buffer */  	const struct firmware		*asd_fw; @@ -223,8 +221,8 @@ struct amdgpu_psp_funcs {  #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))  #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))  #define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type))) -#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ -		(psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) +#define psp_cmd_submit(psp, cmd_mc, fence_mc, index) \ +		(psp)->funcs->cmd_submit((psp), (cmd_mc), (fence_mc), (index))  #define psp_compare_sram_data(psp, ucode, type) \  		(psp)->funcs->compare_sram_data((psp), (ucode), (type))  #define psp_init_microcode(psp) \ @@ -270,6 +268,7 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,  			uint32_t field_val, uint32_t mask, bool check_changed);  extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; +extern const struct amdgpu_ip_block_version psp_v12_0_ip_block;  int psp_gpu_reset(struct amdgpu_device *adev);  int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 1a4412e47810..016ea274b955 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -30,74 +30,6 @@  #include "amdgpu_ras.h"  #include "amdgpu_atomfirmware.h" -struct ras_ih_data { -	/* interrupt bottom half */ -	struct work_struct ih_work; -	int inuse; -	/* IP callback */ -	ras_ih_cb cb; -	/* full of entries */ -	unsigned char *ring; -	unsigned int ring_size; -	unsigned int element_size; -	unsigned int aligned_element_size; -	unsigned int rptr; -	unsigned int wptr; -}; - -struct ras_fs_data { -	char sysfs_name[32]; -	char debugfs_name[32]; -}; - -struct ras_err_data { -	unsigned long ue_count; -	unsigned long ce_count; -}; - -struct ras_err_handler_data { -	/* point to bad pages array */ -	struct { -		unsigned long bp; -		struct amdgpu_bo *bo; -	} *bps; -	/* the count of entries */ -	int count; -	/* the space can place new entries */ -	int space_left; -	/* last reserved entry's index + 1 */ -	int last_reserved; -}; - -struct ras_manager { -	struct ras_common_if head; -	/* reference count */ -	int use; -	/* ras block link */ -	struct list_head node; -	/* the device */ -	struct amdgpu_device *adev; -	/* debugfs */ -	struct dentry *ent; -	/* sysfs */ -	struct device_attribute sysfs_attr; -	int attr_inuse; - -	/* fs node name */ -	struct ras_fs_data fs_data; - -	/* IH data */ -	struct ras_ih_data ih_data; - -	struct ras_err_data err_data; -}; - -struct ras_badpage { -	unsigned int bp; -	unsigned int size; -	unsigned int flags; -}; -  const char *ras_error_string[] = {  	"none",  	"parity", @@ -130,17 +62,15 @@ const char *ras_block_string[] = {  #define AMDGPU_RAS_FLAG_INIT_NEED_RESET		2  #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) +/* inject address is 52 bits */ +#define	RAS_UMC_INJECT_ADDR_LIMIT	(0x1ULL << 52) +  static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,  		uint64_t offset, uint64_t size,  		struct amdgpu_bo **bo_ptr);  static int amdgpu_ras_release_vram(struct amdgpu_device *adev,  		struct amdgpu_bo **bo_ptr); -static void amdgpu_ras_self_test(struct amdgpu_device *adev) -{ -	/* TODO */ -} -  static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,  					size_t size, loff_t *pos)  { @@ -201,6 +131,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,  	char err[9] = "ue";  	int op = -1;  	int block_id; +	uint32_t sub_block;  	u64 address, value;  	if (*pos) @@ -228,17 +159,23 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,  			return -EINVAL;  		data->head.block = block_id; -		data->head.type = memcmp("ue", err, 2) == 0 ? -			AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE : -			AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; +		/* only ue and ce errors are supported */ +		if (!memcmp("ue", err, 2)) +			data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; +		else if (!memcmp("ce", err, 2)) +			data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; +		else +			return -EINVAL; +  		data->op = op;  		if (op == 2) { -			if (sscanf(str, "%*s %*s %*s %llu %llu", -						&address, &value) != 2) -				if (sscanf(str, "%*s %*s %*s 0x%llx 0x%llx", -							&address, &value) != 2) +			if (sscanf(str, "%*s %*s %*s %u %llu %llu", +						&sub_block, &address, &value) != 3) +				if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", +							&sub_block, &address, &value) != 3)  					return -EINVAL; +			data->head.sub_block_index = sub_block;  			data->inject.address = address;  			data->inject.value = value;  		} @@ -283,7 +220,7 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,   * write the struct to the control node.   *   * bash: - * echo op block [error [address value]] > .../ras/ras_ctrl + * echo op block [error [sub_blcok address value]] > .../ras/ras_ctrl   *	op: disable, enable, inject   *		disable: only block is needed   *		enable: block and error are needed @@ -293,10 +230,11 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,   *	error: ue, ce   *		ue: multi_uncorrectable   *		ce: single_correctable + *	sub_block: sub block index, pass 0 if there is no sub block   *   * here are some examples for bash commands, - *	echo inject umc ue 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl - *	echo inject umc ce 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl + *	echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl + *	echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl   *	echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl   *   * How to check the result? @@ -315,7 +253,6 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *  {  	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;  	struct ras_debug_if data; -	struct amdgpu_bo *bo;  	int ret = 0;  	ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data); @@ -333,17 +270,14 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *  		ret = amdgpu_ras_feature_enable(adev, &data.head, 1);  		break;  	case 2: -		ret = amdgpu_ras_reserve_vram(adev, -				data.inject.address, PAGE_SIZE, &bo); -		if (ret) { -			/* address was offset, now it is absolute.*/ -			data.inject.address += adev->gmc.vram_start; -			if (data.inject.address > adev->gmc.vram_end) -				break; -		} else -			data.inject.address = amdgpu_bo_gpu_offset(bo); +		if ((data.inject.address >= adev->gmc.mc_vram_size) || +		    (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) { +			ret = -EINVAL; +			break; +		} + +		/* data.inject.address is offset instead of absolute gpu address */  		ret = amdgpu_ras_error_inject(adev, &data.inject); -		amdgpu_ras_release_vram(adev, &bo);  		break;  	default:  		ret = -EINVAL; @@ -661,14 +595,46 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,  		struct ras_query_if *info)  {  	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); +	struct ras_err_data err_data = {0, 0, 0, NULL};  	if (!obj)  		return -EINVAL; -	/* TODO might read the register to read the count */ + +	switch (info->head.block) { +	case AMDGPU_RAS_BLOCK__UMC: +		if (adev->umc.funcs->query_ras_error_count) +			adev->umc.funcs->query_ras_error_count(adev, &err_data); +		/* umc query_ras_error_address is also responsible for clearing +		 * error status +		 */ +		if (adev->umc.funcs->query_ras_error_address) +			adev->umc.funcs->query_ras_error_address(adev, &err_data); +		break; +	case AMDGPU_RAS_BLOCK__GFX: +		if (adev->gfx.funcs->query_ras_error_count) +			adev->gfx.funcs->query_ras_error_count(adev, &err_data); +		break; +	case AMDGPU_RAS_BLOCK__MMHUB: +		if (adev->mmhub_funcs->query_ras_error_count) +			adev->mmhub_funcs->query_ras_error_count(adev, &err_data); +		break; +	default: +		break; +	} + +	obj->err_data.ue_count += err_data.ue_count; +	obj->err_data.ce_count += err_data.ce_count;  	info->ue_count = obj->err_data.ue_count;  	info->ce_count = obj->err_data.ce_count; +	if (err_data.ce_count) +		dev_info(adev->dev, "%ld correctable errors detected in %s block\n", +			 obj->err_data.ce_count, ras_block_str(info->head.block)); +	if (err_data.ue_count) +		dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n", +			 obj->err_data.ue_count, ras_block_str(info->head.block)); +  	return 0;  } @@ -689,7 +655,23 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,  	if (!obj)  		return -EINVAL; -	ret = psp_ras_trigger_error(&adev->psp, &block_info); +	switch (info->head.block) { +	case AMDGPU_RAS_BLOCK__GFX: +		if (adev->gfx.funcs->ras_error_inject) +			ret = adev->gfx.funcs->ras_error_inject(adev, info); +		else +			ret = -EINVAL; +		break; +	case AMDGPU_RAS_BLOCK__UMC: +	case AMDGPU_RAS_BLOCK__MMHUB: +		ret = psp_ras_trigger_error(&adev->psp, &block_info); +		break; +	default: +		DRM_INFO("%s error injection is not supported yet\n", +			 ras_block_str(info->head.block)); +		ret = -EINVAL; +	} +  	if (ret)  		DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n",  				ras_block_str(info->head.block), @@ -706,7 +688,7 @@ int amdgpu_ras_error_cure(struct amdgpu_device *adev,  }  /* get the total error counts on all IPs */ -int amdgpu_ras_query_error_count(struct amdgpu_device *adev, +unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,  		bool is_ce)  {  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -714,7 +696,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,  	struct ras_err_data data = {0, 0};  	if (!con) -		return -EINVAL; +		return 0;  	list_for_each_entry(obj, &con->head, node) {  		struct ras_query_if info = { @@ -722,7 +704,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,  		};  		if (amdgpu_ras_error_query(adev, &info)) -			return -EINVAL; +			return 0;  		data.ce_count += info.ce_count;  		data.ue_count += info.ue_count; @@ -811,32 +793,8 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,  {  	struct amdgpu_ras *con =  		container_of(attr, struct amdgpu_ras, features_attr); -	struct drm_device *ddev = dev_get_drvdata(dev); -	struct amdgpu_device *adev = ddev->dev_private; -	struct ras_common_if head; -	int ras_block_count = AMDGPU_RAS_BLOCK_COUNT; -	int i; -	ssize_t s; -	struct ras_manager *obj; - -	s = scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); - -	for (i = 0; i < ras_block_count; i++) { -		head.block = i; - -		if (amdgpu_ras_is_feature_enabled(adev, &head)) { -			obj = amdgpu_ras_find_obj(adev, &head); -			s += scnprintf(&buf[s], PAGE_SIZE - s, -					"%s: %s\n", -					ras_block_str(i), -					ras_err_str(obj->head.type)); -		} else -			s += scnprintf(&buf[s], PAGE_SIZE - s, -					"%s: disabled\n", -					ras_block_str(i)); -	} -	return s; +	return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);  }  static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) @@ -1053,6 +1011,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)  	struct ras_ih_data *data = &obj->ih_data;  	struct amdgpu_iv_entry entry;  	int ret; +	struct ras_err_data err_data = {0, 0, 0, NULL};  	while (data->rptr != data->wptr) {  		rmb(); @@ -1067,19 +1026,19 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)  		 * from the callback to udpate the error type/count, etc  		 */  		if (data->cb) { -			ret = data->cb(obj->adev, &entry); +			ret = data->cb(obj->adev, &err_data, &entry);  			/* ue will trigger an interrupt, and in that case  			 * we need do a reset to recovery the whole system.  			 * But leave IP do that recovery, here we just dispatch  			 * the error.  			 */ -			if (ret == AMDGPU_RAS_UE) { -				obj->err_data.ue_count++; +			if (ret == AMDGPU_RAS_SUCCESS) { +				/* these counts could be left as 0 if +				 * some blocks do not count error number +				 */ +				obj->err_data.ue_count += err_data.ue_count; +				obj->err_data.ce_count += err_data.ce_count;  			} -			/* Might need get ce count by register, but not all IP -			 * saves ce count, some IP just use one bit or two bits -			 * to indicate ce happened. -			 */  		}  	}  } @@ -1557,6 +1516,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev)  	amdgpu_ras_check_supported(adev, &con->hw_supported,  			&con->supported); +	if (!con->hw_supported) { +		amdgpu_ras_set_context(adev, NULL); +		kfree(con); +		return 0; +	} +  	con->features = 0;  	INIT_LIST_HEAD(&con->head);  	/* Might need get this flag from vbios. */ @@ -1570,7 +1535,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)  	if (amdgpu_ras_fs_init(adev))  		goto fs_out; -	amdgpu_ras_self_test(adev); +	/* ras init for each ras block */ +	if (adev->umc.funcs->ras_init) +		adev->umc.funcs->ras_init(adev);  	DRM_INFO("RAS INFO: ras initialized successfully, "  			"hardware ability[%x] ras_mask[%x]\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index b2841195bd3b..6c76bb2a6843 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -29,6 +29,7 @@  #include "amdgpu.h"  #include "amdgpu_psp.h"  #include "ta_ras_if.h" +#include "amdgpu_ras_eeprom.h"  enum amdgpu_ras_block {  	AMDGPU_RAS_BLOCK__UMC = 0, @@ -52,6 +53,236 @@ enum amdgpu_ras_block {  #define AMDGPU_RAS_BLOCK_COUNT	AMDGPU_RAS_BLOCK__LAST  #define AMDGPU_RAS_BLOCK_MASK	((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) +enum amdgpu_ras_gfx_subblock { +	/* CPC */ +	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0, +	AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH = +		AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, +	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1, +	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1, +	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1, +	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2, +	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2, +	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, +	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, +	/* CPF */ +	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 = +		AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1, +	AMDGPU_RAS_BLOCK__GFX_CPF_TAG, +	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG, +	/* CPG */ +	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ = +		AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG, +	AMDGPU_RAS_BLOCK__GFX_CPG_TAG, +	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG, +	/* GDS */ +	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, +	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, +	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, +	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, +	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, +	/* SPI */ +	AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM, +	/* SQ */ +	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, +	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I, +	AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, +	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, +	/* SQC (3 ranges) */ +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, +	/* SQC range 0 */ +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START = +		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = +		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END = +		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, +	/* SQC range 1 */ +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = +		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END = +		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, +	/* SQC range 2 */ +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = +		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END = +		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END, +	/* TA */ +	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO = +		AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO, +	AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO, +	AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO, +	AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, +	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, +	/* TCA */ +	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO = +		AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, +	/* TCC (5 sub-ranges) */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, +	/* TCC range 0 */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, +	AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, +	AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, +	/* TCC range 1 */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, +	/* TCC range 2 */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, +	AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN, +	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, +	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, +	/* TCC range 3 */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, +	/* TCC range 4 */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END, +	/* TCI */ +	AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM, +	/* TCP */ +	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM = +		AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM, +	AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM, +	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, +	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, +	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, +	/* TD */ +	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO = +		AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI, +	AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, +	/* EA (3 sub-ranges) */ +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, +	/* EA range 0 */ +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START = +		AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = +		AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START, +	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END = +		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, +	/* EA range 1 */ +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = +		AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END = +		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, +	/* EA range 2 */ +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM = +		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM, +	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM, +	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END = +		AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END, +	/* UTC VM L2 bank */ +	AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE, +	/* UTC VM walker */ +	AMDGPU_RAS_BLOCK__UTC_VML2_WALKER, +	/* UTC ATC L2 2MB cache */ +	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, +	/* UTC ATC L2 4KB cache */ +	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, +	AMDGPU_RAS_BLOCK__GFX_MAX +}; +  enum amdgpu_ras_error_type {  	AMDGPU_RAS_ERROR__NONE							= 0,  	AMDGPU_RAS_ERROR__PARITY						= 1, @@ -76,9 +307,6 @@ struct ras_common_if {  	char name[32];  }; -typedef int (*ras_ih_cb)(struct amdgpu_device *adev, -		struct amdgpu_iv_entry *entry); -  struct amdgpu_ras {  	/* ras infrastructure */  	/* for ras itself. */ @@ -106,10 +334,85 @@ struct amdgpu_ras {  	struct mutex recovery_lock;  	uint32_t flags; + +	struct amdgpu_ras_eeprom_control eeprom_control;  }; -/* interfaces for IP */ +struct ras_fs_data { +	char sysfs_name[32]; +	char debugfs_name[32]; +}; + +struct ras_err_data { +	unsigned long ue_count; +	unsigned long ce_count; +	unsigned long err_addr_cnt; +	uint64_t *err_addr; +}; +struct ras_err_handler_data { +	/* point to bad pages array */ +	struct { +		unsigned long bp; +		struct amdgpu_bo *bo; +	} *bps; +	/* the count of entries */ +	int count; +	/* the space can place new entries */ +	int space_left; +	/* last reserved entry's index + 1 */ +	int last_reserved; +}; + +typedef int (*ras_ih_cb)(struct amdgpu_device *adev, +		struct ras_err_data *err_data, +		struct amdgpu_iv_entry *entry); + +struct ras_ih_data { +	/* interrupt bottom half */ +	struct work_struct ih_work; +	int inuse; +	/* IP callback */ +	ras_ih_cb cb; +	/* full of entries */ +	unsigned char *ring; +	unsigned int ring_size; +	unsigned int element_size; +	unsigned int aligned_element_size; +	unsigned int rptr; +	unsigned int wptr; +}; + +struct ras_manager { +	struct ras_common_if head; +	/* reference count */ +	int use; +	/* ras block link */ +	struct list_head node; +	/* the device */ +	struct amdgpu_device *adev; +	/* debugfs */ +	struct dentry *ent; +	/* sysfs */ +	struct device_attribute sysfs_attr; +	int attr_inuse; + +	/* fs node name */ +	struct ras_fs_data fs_data; + +	/* IH data */ +	struct ras_ih_data ih_data; + +	struct ras_err_data err_data; +}; + +struct ras_badpage { +	unsigned int bp; +	unsigned int size; +	unsigned int flags; +}; + +/* interfaces for IP */  struct ras_fs_if {  	struct ras_common_if head;  	char sysfs_name[32]; @@ -184,7 +487,7 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,  void amdgpu_ras_resume(struct amdgpu_device *adev);  void amdgpu_ras_suspend(struct amdgpu_device *adev); -int amdgpu_ras_query_error_count(struct amdgpu_device *adev, +unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,  		bool is_ce);  /* error handling functions */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c new file mode 100644 index 000000000000..8a32b5c93778 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -0,0 +1,493 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_ras_eeprom.h" +#include "amdgpu.h" +#include "amdgpu_ras.h" +#include <linux/bits.h> +#include "smu_v11_0_i2c.h" + +#define EEPROM_I2C_TARGET_ADDR 0xA0 + +/* + * The 2 macros bellow represent the actual size in bytes that + * those entities occupy in the EEPROM memory. + * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which + * uses uint64 to store 6b fields such as retired_page. + */ +#define EEPROM_TABLE_HEADER_SIZE 20 +#define EEPROM_TABLE_RECORD_SIZE 24 + +#define EEPROM_ADDRESS_SIZE 0x2 + +/* Table hdr is 'AMDR' */ +#define EEPROM_TABLE_HDR_VAL 0x414d4452 +#define EEPROM_TABLE_VER 0x00010000 + +/* Assume 2 Mbit size */ +#define EEPROM_SIZE_BYTES 256000 +#define EEPROM_PAGE__SIZE_BYTES 256 +#define EEPROM_HDR_START 0 +#define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE) +#define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE) +#define EEPROM_ADDR_MSB_MASK GENMASK(17, 8) + +#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev + +static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr, +					  unsigned char *buff) +{ +	uint32_t *pp = (uint32_t *) buff; + +	pp[0] = cpu_to_le32(hdr->header); +	pp[1] = cpu_to_le32(hdr->version); +	pp[2] = cpu_to_le32(hdr->first_rec_offset); +	pp[3] = cpu_to_le32(hdr->tbl_size); +	pp[4] = cpu_to_le32(hdr->checksum); +} + +static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr, +					  unsigned char *buff) +{ +	uint32_t *pp = (uint32_t *)buff; + +	hdr->header 	      = le32_to_cpu(pp[0]); +	hdr->version 	      = le32_to_cpu(pp[1]); +	hdr->first_rec_offset = le32_to_cpu(pp[2]); +	hdr->tbl_size 	      = le32_to_cpu(pp[3]); +	hdr->checksum 	      = le32_to_cpu(pp[4]); +} + +static int __update_table_header(struct amdgpu_ras_eeprom_control *control, +				 unsigned char *buff) +{ +	int ret = 0; +	struct i2c_msg msg = { +			.addr	= EEPROM_I2C_TARGET_ADDR, +			.flags	= 0, +			.len	= EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, +			.buf	= buff, +	}; + + +	*(uint16_t *)buff = EEPROM_HDR_START; +	__encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE); + +	ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); +	if (ret < 1) +		DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); + +	return ret; +} + +static uint32_t  __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control); + +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) +{ +	int ret = 0; +	struct amdgpu_device *adev = to_amdgpu_device(control); +	unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; +	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; +	struct i2c_msg msg = { +			.addr	= EEPROM_I2C_TARGET_ADDR, +			.flags	= I2C_M_RD, +			.len	= EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, +			.buf	= buff, +	}; + +	mutex_init(&control->tbl_mutex); + +	switch (adev->asic_type) { +	case CHIP_VEGA20: +		ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor); +		break; + +	default: +		return 0; +	} + +	if (ret) { +		DRM_ERROR("Failed to init I2C controller, ret:%d", ret); +		return ret; +	} + +	/* Read/Create table header from EEPROM address 0 */ +	ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); +	if (ret < 1) { +		DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret); +		return ret; +	} + +	__decode_table_header_from_buff(hdr, &buff[2]); + +	if (hdr->header == EEPROM_TABLE_HDR_VAL) { +		control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / +				    EEPROM_TABLE_RECORD_SIZE; +		DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", +				 control->num_recs); + +	} else { +		DRM_INFO("Creating new EEPROM table"); + +		hdr->header = EEPROM_TABLE_HDR_VAL; +		hdr->version = EEPROM_TABLE_VER; +		hdr->first_rec_offset = EEPROM_RECORD_START; +		hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; + +		adev->psp.ras.ras->eeprom_control.tbl_byte_sum = +				__calc_hdr_byte_sum(&adev->psp.ras.ras->eeprom_control); +		ret = __update_table_header(control, buff); +	} + +	/* Start inserting records from here */ +	adev->psp.ras.ras->eeprom_control.next_addr = EEPROM_RECORD_START; + +	return ret == 1 ? 0 : -EIO; +} + +void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); + +	switch (adev->asic_type) { +	case CHIP_VEGA20: +		smu_v11_0_i2c_eeprom_control_fini(&control->eeprom_accessor); +		break; + +	default: +		return; +	} +} + +static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control, +					  struct eeprom_table_record *record, +					  unsigned char *buff) +{ +	__le64 tmp = 0; +	int i = 0; + +	/* Next are all record fields according to EEPROM page spec in LE foramt */ +	buff[i++] = record->err_type; + +	buff[i++] = record->bank; + +	tmp = cpu_to_le64(record->ts); +	memcpy(buff + i, &tmp, 8); +	i += 8; + +	tmp = cpu_to_le64((record->offset & 0xffffffffffff)); +	memcpy(buff + i, &tmp, 6); +	i += 6; + +	buff[i++] = record->mem_channel; +	buff[i++] = record->mcumc_id; + +	tmp = cpu_to_le64((record->retired_page & 0xffffffffffff)); +	memcpy(buff + i, &tmp, 6); +} + +static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control, +					    struct eeprom_table_record *record, +					    unsigned char *buff) +{ +	__le64 tmp = 0; +	int i =  0; + +	/* Next are all record fields according to EEPROM page spec in LE foramt */ +	record->err_type = buff[i++]; + +	record->bank = buff[i++]; + +	memcpy(&tmp, buff + i, 8); +	record->ts = le64_to_cpu(tmp); +	i += 8; + +	memcpy(&tmp, buff + i, 6); +	record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); +	i += 6; + +	buff[i++] = record->mem_channel; +	buff[i++] = record->mcumc_id; + +	memcpy(&tmp, buff + i,  6); +	record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); +} + +/* + * When reaching end of EEPROM memory jump back to 0 record address + * When next record access will go beyond EEPROM page boundary modify bits A17/A8 + * in I2C selector to go to next page + */ +static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) +{ +	uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE; + +	/* When all EEPROM memory used jump back to 0 address */ +	if (next_address > EEPROM_SIZE_BYTES) { +		DRM_INFO("Reached end of EEPROM memory, jumping to 0 " +			 "and overriding old record"); +		return EEPROM_RECORD_START; +	} + +	/* +	 * To check if we overflow page boundary  compare next address with +	 * current and see if bits 17/8 of the EEPROM address will change +	 * If they do start from the next 256b page +	 * +	 * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2 +	 */ +	if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) { +		DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx", +				(next_address & EEPROM_ADDR_MSB_MASK)); + +		return  (next_address & EEPROM_ADDR_MSB_MASK); +	} + +	return curr_address; +} + + +static uint32_t  __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control) +{ +	int i; +	uint32_t tbl_sum = 0; + +	/* Header checksum, skip checksum field in the calculation */ +	for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++) +		tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i); + +	return tbl_sum; +} + +static uint32_t  __calc_recs_byte_sum(struct eeprom_table_record *records, +				      int num) +{ +	int i, j; +	uint32_t tbl_sum = 0; + +	/* Records checksum */ +	for (i = 0; i < num; i++) { +		struct eeprom_table_record *record = &records[i]; + +		for (j = 0; j < sizeof(*record); j++) { +			tbl_sum += *(((unsigned char *)record) + j); +		} +	} + +	return tbl_sum; +} + +static inline uint32_t  __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control, +				  struct eeprom_table_record *records, int num) +{ +	return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num); +} + +/* Checksum = 256 -((sum of all table entries) mod 256) */ +static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control, +				  struct eeprom_table_record *records, int num, +				  uint32_t old_hdr_byte_sum) +{ +	/* +	 * This will update the table sum with new records. +	 * +	 * TODO: What happens when the EEPROM table is to be wrapped around +	 * and old records from start will get overridden. +	 */ + +	/* need to recalculate updated header byte sum */ +	control->tbl_byte_sum -= old_hdr_byte_sum; +	control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num); + +	control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256); +} + +/* table sum mod 256 + checksum must equals 256 */ +static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, +			    struct eeprom_table_record *records, int num) +{ +	control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num); + +	if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) { +		DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum); +		return false; +	} + +	return true; +} + +int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, +					    struct eeprom_table_record *records, +					    bool write, +					    int num) +{ +	int i, ret = 0; +	struct i2c_msg *msgs; +	unsigned char *buffs; +	struct amdgpu_device *adev = to_amdgpu_device(control); + +	if (adev->asic_type != CHIP_VEGA20) +		return 0; + +	buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE, +			 GFP_KERNEL); +	if (!buffs) +		return -ENOMEM; + +	mutex_lock(&control->tbl_mutex); + +	msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL); +	if (!msgs) { +		ret = -ENOMEM; +		goto free_buff; +	} + +	/* In case of overflow just start from beginning to not lose newest records */ +	if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES)) +		control->next_addr = EEPROM_RECORD_START; + + +	/* +	 * TODO Currently makes EEPROM writes for each record, this creates +	 * internal fragmentation. Optimized the code to do full page write of +	 * 256b +	 */ +	for (i = 0; i < num; i++) { +		unsigned char *buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; +		struct eeprom_table_record *record = &records[i]; +		struct i2c_msg *msg = &msgs[i]; + +		control->next_addr = __correct_eeprom_dest_address(control->next_addr); + +		/* +		 * Update bits 16,17 of EEPROM address in I2C address by setting them +		 * to bits 1,2 of Device address byte +		 */ +		msg->addr = EEPROM_I2C_TARGET_ADDR | +			       ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); +		msg->flags	= write ? 0 : I2C_M_RD; +		msg->len	= EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE; +		msg->buf	= buff; + +		/* Insert the EEPROM dest addess, bits 0-15 */ +		buff[0] = ((control->next_addr >> 8) & 0xff); +		buff[1] = (control->next_addr & 0xff); + +		/* EEPROM table content is stored in LE format */ +		if (write) +			__encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE); + +		/* +		 * The destination EEPROM address might need to be corrected to account +		 * for page or entire memory wrapping +		 */ +		control->next_addr += EEPROM_TABLE_RECORD_SIZE; +	} + +	ret = i2c_transfer(&control->eeprom_accessor, msgs, num); +	if (ret < 1) { +		DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret); + +		/* TODO Restore prev next EEPROM address ? */ +		goto free_msgs; +	} + + +	if (!write) { +		for (i = 0; i < num; i++) { +			unsigned char *buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; +			struct eeprom_table_record *record = &records[i]; + +			__decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE); +		} +	} + +	if (write) { +		uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control); + +		/* +		 * Update table header with size and CRC and account for table +		 * wrap around where the assumption is that we treat it as empty +		 * table +		 * +		 * TODO - Check the assumption is correct +		 */ +		control->num_recs += num; +		control->num_recs %= EEPROM_MAX_RECORD_NUM; +		control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num; +		if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES) +			control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE + +			control->num_recs * EEPROM_TABLE_RECORD_SIZE; + +		__update_tbl_checksum(control, records, num, old_hdr_byte_sum); + +		__update_table_header(control, buffs); +	} else if (!__validate_tbl_checksum(control, records, num)) { +		DRM_WARN("EEPROM Table checksum mismatch!"); +		/* TODO Uncomment when EEPROM read/write is relliable */ +		/* ret = -EIO; */ +	} + +free_msgs: +	kfree(msgs); + +free_buff: +	kfree(buffs); + +	mutex_unlock(&control->tbl_mutex); + +	return ret == num ? 0 : -EIO; +} + +/* Used for testing if bugs encountered */ +#if 0 +void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control) +{ +	int i; +	struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL); + +	if (!recs) +		return; + +	for (i = 0; i < 1 ; i++) { +		recs[i].address = 0xdeadbeef; +		recs[i].retired_page = i; +	} + +	if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) { + +		memset(recs, 0, sizeof(*recs) * 1); + +		control->next_addr = EEPROM_RECORD_START; + +		if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) { +			for (i = 0; i < 1; i++) +				DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu", +					 recs[i].address, recs[i].retired_page); +		} else +			DRM_ERROR("Failed in reading from table"); + +	} else +		DRM_ERROR("Failed in writing to table"); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h new file mode 100644 index 000000000000..41f3fcb9a29b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -0,0 +1,90 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _AMDGPU_RAS_EEPROM_H +#define _AMDGPU_RAS_EEPROM_H + +#include <linux/i2c.h> + +struct amdgpu_device; + +enum amdgpu_ras_eeprom_err_type{ +	AMDGPU_RAS_EEPROM_ERR_PLACE_HOLDER, +	AMDGPU_RAS_EEPROM_ERR_RECOVERABLE, +	AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE +}; + +struct amdgpu_ras_eeprom_table_header { +	uint32_t header; +	uint32_t version; +	uint32_t first_rec_offset; +	uint32_t tbl_size; +	uint32_t checksum; +}__attribute__((__packed__)); + +struct amdgpu_ras_eeprom_control { +	struct amdgpu_ras_eeprom_table_header tbl_hdr; +	struct i2c_adapter eeprom_accessor; +	uint32_t next_addr; +	unsigned int num_recs; +	struct mutex tbl_mutex; +	bool bus_locked; +	uint32_t tbl_byte_sum; +}; + +/* + * Represents single table record. Packed to be easily serialized into byte + * stream. + */ +struct eeprom_table_record { + +	union { +		uint64_t address; +		uint64_t offset; +	}; + +	uint64_t retired_page; +	uint64_t ts; + +	enum amdgpu_ras_eeprom_err_type err_type; + +	union { +		unsigned char bank; +		unsigned char cu; +	}; + +	unsigned char mem_channel; +	unsigned char mcumc_id; +}__attribute__((__packed__)); + +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control); +void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control); + +int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, +					    struct eeprom_table_record *records, +					    bool write, +					    int num); + +void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control); + +#endif // _AMDGPU_RAS_EEPROM_H diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4410c97ac9b7..930316e60155 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -29,7 +29,7 @@  #include <drm/drm_print.h>  /* max number of rings */ -#define AMDGPU_MAX_RINGS		24 +#define AMDGPU_MAX_RINGS		28  #define AMDGPU_MAX_GFX_RINGS		2  #define AMDGPU_MAX_COMPUTE_RINGS	8  #define AMDGPU_MAX_VCE_RINGS		3 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 35dd152f9d5c..a9ae0d8a0589 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -25,11 +25,17 @@  #define __AMDGPU_SDMA_H__  /* max number of IP instances */ -#define AMDGPU_MAX_SDMA_INSTANCES		2 +#define AMDGPU_MAX_SDMA_INSTANCES		8  enum amdgpu_sdma_irq {  	AMDGPU_SDMA_IRQ_INSTANCE0  = 0,  	AMDGPU_SDMA_IRQ_INSTANCE1, +	AMDGPU_SDMA_IRQ_INSTANCE2, +	AMDGPU_SDMA_IRQ_INSTANCE3, +	AMDGPU_SDMA_IRQ_INSTANCE4, +	AMDGPU_SDMA_IRQ_INSTANCE5, +	AMDGPU_SDMA_IRQ_INSTANCE6, +	AMDGPU_SDMA_IRQ_INSTANCE7,  	AMDGPU_SDMA_IRQ_LAST  }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 9828f3c7c655..95e5e93edd18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -190,10 +190,10 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,   */  int amdgpu_sync_resv(struct amdgpu_device *adev,  		     struct amdgpu_sync *sync, -		     struct reservation_object *resv, +		     struct dma_resv *resv,  		     void *owner, bool explicit_sync)  { -	struct reservation_object_list *flist; +	struct dma_resv_list *flist;  	struct dma_fence *f;  	void *fence_owner;  	unsigned i; @@ -203,16 +203,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,  		return -EINVAL;  	/* always sync to the exclusive fence */ -	f = reservation_object_get_excl(resv); +	f = dma_resv_get_excl(resv);  	r = amdgpu_sync_fence(adev, sync, f, false); -	flist = reservation_object_get_list(resv); +	flist = dma_resv_get_list(resv);  	if (!flist || r)  		return r;  	for (i = 0; i < flist->shared_count; ++i) {  		f = rcu_dereference_protected(flist->shared[i], -					      reservation_object_held(resv)); +					      dma_resv_held(resv));  		/* We only want to trigger KFD eviction fences on  		 * evict or move jobs. Skip KFD fences otherwise.  		 */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 10cf23a57f17..b5f1778a2319 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -27,7 +27,7 @@  #include <linux/hashtable.h>  struct dma_fence; -struct reservation_object; +struct dma_resv;  struct amdgpu_device;  struct amdgpu_ring; @@ -44,7 +44,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,  		      struct dma_fence *f, bool explicit);  int amdgpu_sync_resv(struct amdgpu_device *adev,  		     struct amdgpu_sync *sync, -		     struct reservation_object *resv, +		     struct dma_resv *resv,  		     void *owner,  		     bool explicit_sync);  struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e51b48ac48eb..dff41d0a85fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -227,7 +227,7 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)  	if (amdgpu_ttm_tt_get_usermm(bo->ttm))  		return -EPERM; -	return drm_vma_node_verify_access(&abo->gem_base.vma_node, +	return drm_vma_node_verify_access(&abo->tbo.base.vma_node,  					  filp->private_data);  } @@ -303,7 +303,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,  			       struct amdgpu_copy_mem *src,  			       struct amdgpu_copy_mem *dst,  			       uint64_t size, -			       struct reservation_object *resv, +			       struct dma_resv *resv,  			       struct dma_fence **f)  {  	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -440,10 +440,26 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,  	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,  				       new_mem->num_pages << PAGE_SHIFT, -				       bo->resv, &fence); +				       bo->base.resv, &fence);  	if (r)  		goto error; +	/* clear the space being freed */ +	if (old_mem->mem_type == TTM_PL_VRAM && +	    (ttm_to_amdgpu_bo(bo)->flags & +	     AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { +		struct dma_fence *wipe_fence = NULL; + +		r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, +				       NULL, &wipe_fence); +		if (r) { +			goto error; +		} else if (wipe_fence) { +			dma_fence_put(fence); +			fence = wipe_fence; +		} +	} +  	/* Always block for VM page tables before committing the new location */  	if (bo->type == ttm_bo_type_kernel)  		r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem); @@ -778,7 +794,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)  	struct hmm_range *range;  	unsigned long i;  	uint64_t *pfns; -	int retry = 0;  	int r = 0;  	if (!mm) /* Happens during process shutdown */ @@ -819,10 +834,11 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)  				0 : range->flags[HMM_PFN_WRITE];  	range->pfn_flags_mask = 0;  	range->pfns = pfns; -	hmm_range_register(range, mirror, start, -			   start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); +	range->start = start; +	range->end = start + ttm->num_pages * PAGE_SIZE; + +	hmm_range_register(range, mirror); -retry:  	/*  	 * Just wait for range to be valid, safe to ignore return value as we  	 * will use the return value of hmm_range_fault() below under the @@ -831,24 +847,12 @@ retry:  	hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);  	down_read(&mm->mmap_sem); - -	r = hmm_range_fault(range, true); -	if (unlikely(r < 0)) { -		if (likely(r == -EAGAIN)) { -			/* -			 * return -EAGAIN, mmap_sem is dropped -			 */ -			if (retry++ < MAX_RETRY_HMM_RANGE_FAULT) -				goto retry; -			else -				pr_err("Retry hmm fault too many times\n"); -		} - -		goto out_up_read; -	} - +	r = hmm_range_fault(range, 0);  	up_read(&mm->mmap_sem); +	if (unlikely(r < 0)) +		goto out_free_pfns; +  	for (i = 0; i < ttm->num_pages; i++) {  		pages[i] = hmm_device_entry_to_page(range, pfns[i]);  		if (unlikely(!pages[i])) { @@ -864,9 +868,6 @@ retry:  	return 0; -out_up_read: -	if (likely(r != -EAGAIN)) -		up_read(&mm->mmap_sem);  out_free_pfns:  	hmm_range_unregister(range);  	kvfree(pfns); @@ -1470,7 +1471,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,  {  	unsigned long num_pages = bo->mem.num_pages;  	struct drm_mm_node *node = bo->mem.mm_node; -	struct reservation_object_list *flist; +	struct dma_resv_list *flist;  	struct dma_fence *f;  	int i; @@ -1478,18 +1479,18 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,  	 * cleanly handle page faults.  	 */  	if (bo->type == ttm_bo_type_kernel && -	    !reservation_object_test_signaled_rcu(bo->resv, true)) +	    !dma_resv_test_signaled_rcu(bo->base.resv, true))  		return false;  	/* If bo is a KFD BO, check if the bo belongs to the current process.  	 * If true, then return false as any KFD process needs all its BOs to  	 * be resident to run successfully  	 */ -	flist = reservation_object_get_list(bo->resv); +	flist = dma_resv_get_list(bo->base.resv);  	if (flist) {  		for (i = 0; i < flist->shared_count; ++i) {  			f = rcu_dereference_protected(flist->shared[i], -				reservation_object_held(bo->resv)); +				dma_resv_held(bo->base.resv));  			if (amdkfd_fence_check_mm(f, current->mm))  				return false;  		} @@ -1599,6 +1600,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = {  	.move = &amdgpu_bo_move,  	.verify_access = &amdgpu_verify_access,  	.move_notify = &amdgpu_bo_move_notify, +	.release_notify = &amdgpu_bo_release_notify,  	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,  	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,  	.io_mem_free = &amdgpu_ttm_io_mem_free, @@ -1721,6 +1723,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	uint64_t gtt_size;  	int r;  	u64 vis_vram_limit; +	void *stolen_vga_buf;  	mutex_init(&adev->mman.gtt_window_lock); @@ -1728,7 +1731,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	r = ttm_bo_device_init(&adev->mman.bdev,  			       &amdgpu_bo_driver,  			       adev->ddev->anon_inode->i_mapping, -			       adev->need_dma32); +			       dma_addressing_limited(adev->dev));  	if (r) {  		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);  		return r; @@ -1775,7 +1778,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,  				    AMDGPU_GEM_DOMAIN_VRAM,  				    &adev->stolen_vga_memory, -				    NULL, NULL); +				    NULL, &stolen_vga_buf);  	if (r)  		return r;  	DRM_INFO("amdgpu: %uM of VRAM memory ready\n", @@ -1839,8 +1842,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)   */  void amdgpu_ttm_late_init(struct amdgpu_device *adev)  { +	void *stolen_vga_buf;  	/* return the VGA stolen memory (if any) back to VRAM */ -	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); +	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);  }  /** @@ -1992,7 +1996,7 @@ error_free:  int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,  		       uint64_t dst_offset, uint32_t byte_count, -		       struct reservation_object *resv, +		       struct dma_resv *resv,  		       struct dma_fence **fence, bool direct_submit,  		       bool vm_needs_flush)  { @@ -2066,7 +2070,7 @@ error_free:  int amdgpu_fill_buffer(struct amdgpu_bo *bo,  		       uint32_t src_data, -		       struct reservation_object *resv, +		       struct dma_resv *resv,  		       struct dma_fence **fence)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index caa76c693700..0dddedc06ae3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -38,6 +38,8 @@  #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512  #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	2 +#define AMDGPU_POISON	0xd0bed0be +  struct amdgpu_mman {  	struct ttm_bo_device		bdev;  	bool				mem_global_referenced; @@ -83,18 +85,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,  int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,  		       uint64_t dst_offset, uint32_t byte_count, -		       struct reservation_object *resv, +		       struct dma_resv *resv,  		       struct dma_fence **fence, bool direct_submit,  		       bool vm_needs_flush);  int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,  			       struct amdgpu_copy_mem *src,  			       struct amdgpu_copy_mem *dst,  			       uint64_t size, -			       struct reservation_object *resv, +			       struct dma_resv *resv,  			       struct dma_fence **f);  int amdgpu_fill_buffer(struct amdgpu_bo *bo,  			uint32_t src_data, -			struct reservation_object *resv, +			struct dma_resv *resv,  			struct dma_fence **fence);  int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index bfaa0eac3213..3a6115ad0196 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -83,8 +83,8 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr)  		const struct smc_firmware_header_v2_0 *v2_hdr =  			container_of(v1_hdr, struct smc_firmware_header_v2_0, v1_0); -		DRM_INFO("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); -		DRM_INFO("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes)); +		DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); +		DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes));  	} else {  		DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor);  	} @@ -269,6 +269,16 @@ void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr)  			DRM_DEBUG("kdb_size_bytes: %u\n",  				  le32_to_cpu(psp_hdr_v1_1->kdb_size_bytes));  		} +		if (version_minor == 2) { +			const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 = +				container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0); +			DRM_DEBUG("kdb_header_version: %u\n", +				  le32_to_cpu(psp_hdr_v1_2->kdb_header_version)); +			DRM_DEBUG("kdb_offset_bytes: %u\n", +				  le32_to_cpu(psp_hdr_v1_2->kdb_offset_bytes)); +			DRM_DEBUG("kdb_size_bytes: %u\n", +				  le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes)); +		}  	} else {  		DRM_ERROR("Unknown PSP ucode version: %u.%u\n",  			  version_major, version_minor); @@ -350,11 +360,17 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)  	case CHIP_RAVEN:  	case CHIP_VEGA12:  	case CHIP_VEGA20: +	case CHIP_RENOIR:  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		if (!load_type)  			return AMDGPU_FW_LOAD_DIRECT;  		else  			return AMDGPU_FW_LOAD_PSP; +	case CHIP_ARCTURUS: +		return AMDGPU_FW_LOAD_DIRECT; +  	default:  		DRM_ERROR("Unknown firmware load type\n");  	} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index c1fb6dc86440..b34f00d42049 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -90,6 +90,15 @@ struct psp_firmware_header_v1_1 {  	uint32_t kdb_size_bytes;  }; +/* version_major=1, version_minor=2 */ +struct psp_firmware_header_v1_2 { +	struct psp_firmware_header_v1_0 v1_0; +	uint32_t reserve[3]; +	uint32_t kdb_header_version; +	uint32_t kdb_offset_bytes; +	uint32_t kdb_size_bytes; +}; +  /* version_major=1, version_minor=0 */  struct ta_firmware_header_v1_0 {  	struct common_firmware_header header; @@ -262,6 +271,12 @@ union amdgpu_firmware_header {  enum AMDGPU_UCODE_ID {  	AMDGPU_UCODE_ID_SDMA0 = 0,  	AMDGPU_UCODE_ID_SDMA1, +	AMDGPU_UCODE_ID_SDMA2, +	AMDGPU_UCODE_ID_SDMA3, +	AMDGPU_UCODE_ID_SDMA4, +	AMDGPU_UCODE_ID_SDMA5, +	AMDGPU_UCODE_ID_SDMA6, +	AMDGPU_UCODE_ID_SDMA7,  	AMDGPU_UCODE_ID_CP_CE,  	AMDGPU_UCODE_ID_CP_PFP,  	AMDGPU_UCODE_ID_CP_ME, @@ -281,6 +296,7 @@ enum AMDGPU_UCODE_ID {  	AMDGPU_UCODE_ID_UVD1,  	AMDGPU_UCODE_ID_VCE,  	AMDGPU_UCODE_ID_VCN, +	AMDGPU_UCODE_ID_VCN1,  	AMDGPU_UCODE_ID_DMCU_ERAM,  	AMDGPU_UCODE_ID_DMCU_INTV,  	AMDGPU_UCODE_ID_VCN0_RAM, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h new file mode 100644 index 000000000000..975afa04df09 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2019  Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_UMC_H__ +#define __AMDGPU_UMC_H__ + +/* implement 64 bits REG operations via 32 bits interface */ +#define RREG64_UMC(reg)	(RREG32(reg) | \ +				((uint64_t)RREG32((reg) + 1) << 32)) +#define WREG64_UMC(reg, v)	\ +	do {	\ +		WREG32((reg), lower_32_bits(v));	\ +		WREG32((reg) + 1, upper_32_bits(v));	\ +	} while (0) + +/* + * void (*func)(struct amdgpu_device *adev, struct ras_err_data *err_data, + *				uint32_t umc_reg_offset, uint32_t channel_index) + */ +#define amdgpu_umc_for_each_channel(func)	\ +	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;	\ +	uint32_t umc_inst, channel_inst, umc_reg_offset, channel_index;	\ +	for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) {	\ +		/* enable the index mode to query eror count per channel */	\ +		adev->umc.funcs->enable_umc_index_mode(adev, umc_inst);	\ +		for (channel_inst = 0;	\ +			channel_inst < adev->umc.channel_inst_num;	\ +			channel_inst++) {	\ +			/* calc the register offset according to channel instance */	\ +			umc_reg_offset = adev->umc.channel_offs * channel_inst;	\ +			/* get channel index of interleaved memory */	\ +			channel_index = adev->umc.channel_idx_tbl[	\ +				umc_inst * adev->umc.channel_inst_num + channel_inst];	\ +			(func)(adev, err_data, umc_reg_offset, channel_index);	\ +		}	\ +	}	\ +	adev->umc.funcs->disable_umc_index_mode(adev); + +struct amdgpu_umc_funcs { +	void (*ras_init)(struct amdgpu_device *adev); +	void (*query_ras_error_count)(struct amdgpu_device *adev, +					void *ras_error_status); +	void (*query_ras_error_address)(struct amdgpu_device *adev, +					void *ras_error_status); +	void (*enable_umc_index_mode)(struct amdgpu_device *adev, +					uint32_t umc_instance); +	void (*disable_umc_index_mode)(struct amdgpu_device *adev); +}; + +struct amdgpu_umc { +	/* max error count in one ras query call */ +	uint32_t max_ras_err_cnt_per_query; +	/* number of umc channel instance with memory map register access */ +	uint32_t channel_inst_num; +	/* number of umc instance with memory map register access */ +	uint32_t umc_inst_num; +	/* UMC regiser per channel offset */ +	uint32_t channel_offs; +	/* channel index table of interleaved memory */ +	const uint32_t *channel_idx_tbl; + +	const struct amdgpu_umc_funcs *funcs; +}; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 5b2fea3b4a2c..b2c364b8695f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1073,7 +1073,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,  	ib->length_dw = 16;  	if (direct) { -		r = reservation_object_wait_timeout_rcu(bo->tbo.resv, +		r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,  							true, false,  							msecs_to_jiffies(10));  		if (r == 0) @@ -1085,7 +1085,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,  		if (r)  			goto err_free;  	} else { -		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, +		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,  				     AMDGPU_FENCE_OWNER_UNDEFINED, false);  		if (r)  			goto err_free; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 2e12eeb314a7..7a6beb2e7c4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -46,12 +46,20 @@  #define FIRMWARE_RAVEN		"amdgpu/raven_vcn.bin"  #define FIRMWARE_PICASSO	"amdgpu/picasso_vcn.bin"  #define FIRMWARE_RAVEN2		"amdgpu/raven2_vcn.bin" +#define FIRMWARE_ARCTURUS 	"amdgpu/arcturus_vcn.bin" +#define FIRMWARE_RENOIR 	"amdgpu/renoir_vcn.bin"  #define FIRMWARE_NAVI10 	"amdgpu/navi10_vcn.bin" +#define FIRMWARE_NAVI14 	"amdgpu/navi14_vcn.bin" +#define FIRMWARE_NAVI12 	"amdgpu/navi12_vcn.bin"  MODULE_FIRMWARE(FIRMWARE_RAVEN);  MODULE_FIRMWARE(FIRMWARE_PICASSO);  MODULE_FIRMWARE(FIRMWARE_RAVEN2); +MODULE_FIRMWARE(FIRMWARE_ARCTURUS); +MODULE_FIRMWARE(FIRMWARE_RENOIR);  MODULE_FIRMWARE(FIRMWARE_NAVI10); +MODULE_FIRMWARE(FIRMWARE_NAVI14); +MODULE_FIRMWARE(FIRMWARE_NAVI12);  static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -61,7 +69,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)  	const char *fw_name;  	const struct common_firmware_header *hdr;  	unsigned char fw_check; -	int r; +	int i, r;  	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -74,12 +82,33 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)  		else  			fw_name = FIRMWARE_RAVEN;  		break; +	case CHIP_ARCTURUS: +		fw_name = FIRMWARE_ARCTURUS; +		break; +	case CHIP_RENOIR: +		fw_name = FIRMWARE_RENOIR; +		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && +		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) +			adev->vcn.indirect_sram = true; +		break;  	case CHIP_NAVI10:  		fw_name = FIRMWARE_NAVI10;  		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&  		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))  			adev->vcn.indirect_sram = true;  		break; +	case CHIP_NAVI14: +		fw_name = FIRMWARE_NAVI14; +		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && +		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) +			adev->vcn.indirect_sram = true; +		break; +	case CHIP_NAVI12: +		fw_name = FIRMWARE_NAVI12; +		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && +		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) +			adev->vcn.indirect_sram = true; +		break;  	default:  		return -EINVAL;  	} @@ -133,12 +162,18 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)  	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;  	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)  		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); -	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, -				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, -				    &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); -	if (r) { -		dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); -		return r; + +	for (i = 0; i < adev->vcn.num_vcn_inst; i++) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; + +		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, +						AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, +						&adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); +		if (r) { +			dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); +			return r; +		}  	}  	if (adev->vcn.indirect_sram) { @@ -156,26 +191,30 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)  int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)  { -	int i; - -	kvfree(adev->vcn.saved_bo); +	int i, j;  	if (adev->vcn.indirect_sram) {  		amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, -			      &adev->vcn.dpg_sram_gpu_addr, -			      (void **)&adev->vcn.dpg_sram_cpu_addr); +				      &adev->vcn.dpg_sram_gpu_addr, +				      (void **)&adev->vcn.dpg_sram_cpu_addr);  	} -	amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, -			      &adev->vcn.gpu_addr, -			      (void **)&adev->vcn.cpu_addr); +	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { +		if (adev->vcn.harvest_config & (1 << j)) +			continue; +		kvfree(adev->vcn.inst[j].saved_bo); + +		amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, +					  &adev->vcn.inst[j].gpu_addr, +					  (void **)&adev->vcn.inst[j].cpu_addr); -	amdgpu_ring_fini(&adev->vcn.ring_dec); +		amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); -	for (i = 0; i < adev->vcn.num_enc_rings; ++i) -		amdgpu_ring_fini(&adev->vcn.ring_enc[i]); +		for (i = 0; i < adev->vcn.num_enc_rings; ++i) +			amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); -	amdgpu_ring_fini(&adev->vcn.ring_jpeg); +		amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg); +	}  	release_firmware(adev->vcn.fw); @@ -186,21 +225,25 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)  {  	unsigned size;  	void *ptr; +	int i;  	cancel_delayed_work_sync(&adev->vcn.idle_work); -	if (adev->vcn.vcpu_bo == NULL) -		return 0; - -	size = amdgpu_bo_size(adev->vcn.vcpu_bo); -	ptr = adev->vcn.cpu_addr; +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		if (adev->vcn.inst[i].vcpu_bo == NULL) +			return 0; -	adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL); -	if (!adev->vcn.saved_bo) -		return -ENOMEM; +		size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); +		ptr = adev->vcn.inst[i].cpu_addr; -	memcpy_fromio(adev->vcn.saved_bo, ptr, size); +		adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); +		if (!adev->vcn.inst[i].saved_bo) +			return -ENOMEM; +		memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); +	}  	return 0;  } @@ -208,32 +251,36 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)  {  	unsigned size;  	void *ptr; +	int i; -	if (adev->vcn.vcpu_bo == NULL) -		return -EINVAL; - -	size = amdgpu_bo_size(adev->vcn.vcpu_bo); -	ptr = adev->vcn.cpu_addr; - -	if (adev->vcn.saved_bo != NULL) { -		memcpy_toio(ptr, adev->vcn.saved_bo, size); -		kvfree(adev->vcn.saved_bo); -		adev->vcn.saved_bo = NULL; -	} else { -		const struct common_firmware_header *hdr; -		unsigned offset; - -		hdr = (const struct common_firmware_header *)adev->vcn.fw->data; -		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { -			offset = le32_to_cpu(hdr->ucode_array_offset_bytes); -			memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, -				    le32_to_cpu(hdr->ucode_size_bytes)); -			size -= le32_to_cpu(hdr->ucode_size_bytes); -			ptr += le32_to_cpu(hdr->ucode_size_bytes); +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		if (adev->vcn.inst[i].vcpu_bo == NULL) +			return -EINVAL; + +		size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); +		ptr = adev->vcn.inst[i].cpu_addr; + +		if (adev->vcn.inst[i].saved_bo != NULL) { +			memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); +			kvfree(adev->vcn.inst[i].saved_bo); +			adev->vcn.inst[i].saved_bo = NULL; +		} else { +			const struct common_firmware_header *hdr; +			unsigned offset; + +			hdr = (const struct common_firmware_header *)adev->vcn.fw->data; +			if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { +				offset = le32_to_cpu(hdr->ucode_array_offset_bytes); +				memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, +					    le32_to_cpu(hdr->ucode_size_bytes)); +				size -= le32_to_cpu(hdr->ucode_size_bytes); +				ptr += le32_to_cpu(hdr->ucode_size_bytes); +			} +			memset_io(ptr, 0, size);  		} -		memset_io(ptr, 0, size);  	} -  	return 0;  } @@ -241,35 +288,40 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)  {  	struct amdgpu_device *adev =  		container_of(work, struct amdgpu_device, vcn.idle_work.work); -	unsigned int fences = 0; -	unsigned int i; +	unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; +	unsigned int i, j; -	for (i = 0; i < adev->vcn.num_enc_rings; ++i) { -		fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); -	} +	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { +		if (adev->vcn.harvest_config & (1 << j)) +			continue; +		for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +			fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); +		} -	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{ -		struct dpg_pause_state new_state; +		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{ +			struct dpg_pause_state new_state; -		if (fences) -			new_state.fw_based = VCN_DPG_STATE__PAUSE; -		else -			new_state.fw_based = VCN_DPG_STATE__UNPAUSE; +			if (fence[j]) +				new_state.fw_based = VCN_DPG_STATE__PAUSE; +			else +				new_state.fw_based = VCN_DPG_STATE__UNPAUSE; -		if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) -			new_state.jpeg = VCN_DPG_STATE__PAUSE; -		else -			new_state.jpeg = VCN_DPG_STATE__UNPAUSE; +			if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg)) +				new_state.jpeg = VCN_DPG_STATE__PAUSE; +			else +				new_state.jpeg = VCN_DPG_STATE__UNPAUSE; -		adev->vcn.pause_dpg_mode(adev, &new_state); -	} +			adev->vcn.pause_dpg_mode(adev, &new_state); +		} -	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); -	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec); +		fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg); +		fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); +		fences += fence[j]; +	}  	if (fences == 0) {  		amdgpu_gfx_off_ctrl(adev, true); -		if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) +		if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled)  			amdgpu_dpm_enable_uvd(adev, false);  		else  			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -286,7 +338,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)  	if (set_clocks) {  		amdgpu_gfx_off_ctrl(adev, false); -		if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) +		if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled)  			amdgpu_dpm_enable_uvd(adev, true);  		else  			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -299,14 +351,14 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)  		unsigned int i;  		for (i = 0; i < adev->vcn.num_enc_rings; ++i) { -			fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); +			fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);  		}  		if (fences)  			new_state.fw_based = VCN_DPG_STATE__PAUSE;  		else  			new_state.fw_based = VCN_DPG_STATE__UNPAUSE; -		if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) +		if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg))  			new_state.jpeg = VCN_DPG_STATE__PAUSE;  		else  			new_state.jpeg = VCN_DPG_STATE__UNPAUSE; @@ -332,7 +384,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)  	unsigned i;  	int r; -	WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD); +	WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);  	r = amdgpu_ring_alloc(ring, 3);  	if (r)  		return r; @@ -340,7 +392,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)  	amdgpu_ring_write(ring, 0xDEADBEEF);  	amdgpu_ring_commit(ring);  	for (i = 0; i < adev->usec_timeout; i++) { -		tmp = RREG32(adev->vcn.external.scratch9); +		tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);  		if (tmp == 0xDEADBEEF)  			break;  		udelay(1); @@ -651,7 +703,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)  	unsigned i;  	int r; -	WREG32(adev->vcn.external.jpeg_pitch, 0xCAFEDEAD); +	WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD);  	r = amdgpu_ring_alloc(ring, 3);  	if (r)  		return r; @@ -661,7 +713,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)  	amdgpu_ring_commit(ring);  	for (i = 0; i < adev->usec_timeout; i++) { -		tmp = RREG32(adev->vcn.external.jpeg_pitch); +		tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);  		if (tmp == 0xDEADBEEF)  			break;  		udelay(1); @@ -735,7 +787,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)  	}  	for (i = 0; i < adev->usec_timeout; i++) { -		tmp = RREG32(adev->vcn.external.jpeg_pitch); +		tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch);  		if (tmp == 0xDEADBEEF)  			break;  		udelay(1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 99f14fcc1460..dface275c81a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -30,6 +30,12 @@  #define AMDGPU_VCN_FIRMWARE_OFFSET	256  #define AMDGPU_VCN_MAX_ENC_RINGS	3 +#define AMDGPU_MAX_VCN_INSTANCES	2 + +#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0) +#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1) + +#define VCN_DEC_KMD_CMD 		0x80000000  #define VCN_DEC_CMD_FENCE		0x00000000  #define VCN_DEC_CMD_TRAP		0x00000001  #define VCN_DEC_CMD_WRITE_REG		0x00000004 @@ -145,34 +151,49 @@ struct amdgpu_vcn_reg{  	unsigned	data1;  	unsigned	cmd;  	unsigned	nop; +	unsigned	context_id; +	unsigned	ib_vmid; +	unsigned	ib_bar_low; +	unsigned	ib_bar_high; +	unsigned	ib_size; +	unsigned	gp_scratch8;  	unsigned	scratch9;  	unsigned	jpeg_pitch;  }; -struct amdgpu_vcn { +struct amdgpu_vcn_inst {  	struct amdgpu_bo	*vcpu_bo;  	void			*cpu_addr;  	uint64_t		gpu_addr; -	unsigned		fw_version;  	void			*saved_bo; -	struct delayed_work	idle_work; -	const struct firmware	*fw;	/* VCN firmware */  	struct amdgpu_ring	ring_dec;  	struct amdgpu_ring	ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];  	struct amdgpu_ring	ring_jpeg;  	struct amdgpu_irq_src	irq; +	struct amdgpu_vcn_reg	external; +}; + +struct amdgpu_vcn { +	unsigned		fw_version; +	struct delayed_work	idle_work; +	const struct firmware	*fw;	/* VCN firmware */  	unsigned		num_enc_rings;  	enum amd_powergating_state cur_state;  	struct dpg_pause_state pause_state; -	struct amdgpu_vcn_reg	internal, external; -	int (*pause_dpg_mode)(struct amdgpu_device *adev, -		struct dpg_pause_state *new_state);  	bool			indirect_sram;  	struct amdgpu_bo	*dpg_sram_bo;  	void			*dpg_sram_cpu_addr;  	uint64_t		dpg_sram_gpu_addr;  	uint32_t		*dpg_sram_curr_addr; + +	uint8_t	num_vcn_inst; +	struct amdgpu_vcn_inst	inst[AMDGPU_MAX_VCN_INSTANCES]; +	struct amdgpu_vcn_reg	internal; + +	unsigned	harvest_config; +	int (*pause_dpg_mode)(struct amdgpu_device *adev, +		struct dpg_pause_state *new_state);  };  int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 59dd204498c5..e32ae906d797 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -430,48 +430,3 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)  	return clk;  } - -void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev) -{ -	struct amdgpu_virt *virt = &adev->virt; - -	if (virt->ops && virt->ops->init_reg_access_mode) -		virt->ops->init_reg_access_mode(adev); -} - -bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev) -{ -	bool ret = false; -	struct amdgpu_virt *virt = &adev->virt; - -	if (amdgpu_sriov_vf(adev) -		&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH)) -		ret = true; - -	return ret; -} - -bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev) -{ -	bool ret = false; -	struct amdgpu_virt *virt = &adev->virt; - -	if (amdgpu_sriov_vf(adev) -		&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC) -		&& !(amdgpu_sriov_runtime(adev))) -		ret = true; - -	return ret; -} - -bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev) -{ -	bool ret = false; -	struct amdgpu_virt *virt = &adev->virt; - -	if (amdgpu_sriov_vf(adev) -		&& (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING)) -		ret = true; - -	return ret; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index f5107731e9c4..b0b2bdc750df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -48,12 +48,6 @@ struct amdgpu_vf_error_buffer {  	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];  }; -/* According to the fw feature, some new reg access modes are supported */ -#define AMDGPU_VIRT_REG_ACCESS_LEGACY          (1 << 0) /* directly mmio */ -#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH      (1 << 1) /* by PSP */ -#define AMDGPU_VIRT_REG_ACCESS_RLC             (1 << 2) /* by RLC */ -#define AMDGPU_VIRT_REG_SKIP_SEETING           (1 << 3) /* Skip setting reg */ -  /**   * struct amdgpu_virt_ops - amdgpu device virt operations   */ @@ -65,7 +59,6 @@ struct amdgpu_virt_ops {  	void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);  	int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);  	int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); -	void (*init_reg_access_mode)(struct amdgpu_device *adev);  };  /* @@ -315,10 +308,4 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);  uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);  uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); - -void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev); -bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev); -bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev); -bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev); -  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 24c3c05e2fb7..e2fb141ff2e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -302,7 +302,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,  	base->next = bo->vm_bo;  	bo->vm_bo = base; -	if (bo->tbo.resv != vm->root.base.bo->tbo.resv) +	if (bo->tbo.base.resv != vm->root.base.bo->tbo.base.resv)  		return;  	vm->bulk_moveable = false; @@ -583,7 +583,7 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)  	for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {  		struct amdgpu_vm *vm = bo_base->vm; -		if (abo->tbo.resv == vm->root.base.bo->tbo.resv) +		if (abo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)  			vm->bulk_moveable = false;  	} @@ -834,7 +834,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,  		bp->flags |= AMDGPU_GEM_CREATE_SHADOW;  	bp->type = ttm_bo_type_kernel;  	if (vm->root.base.bo) -		bp->resv = vm->root.base.bo->tbo.resv; +		bp->resv = vm->root.base.bo->tbo.base.resv;  }  /** @@ -1574,7 +1574,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,  	flags &= ~AMDGPU_PTE_EXECUTABLE;  	flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; -	if (adev->asic_type == CHIP_NAVI10) { +	if (adev->asic_type >= CHIP_NAVI10) {  		flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;  		flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);  	} else { @@ -1702,7 +1702,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,  			ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);  			pages_addr = ttm->dma_address;  		} -		exclusive = reservation_object_get_excl(bo->tbo.resv); +		exclusive = dma_resv_get_excl(bo->tbo.base.resv);  	}  	if (bo) { @@ -1712,7 +1712,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,  		flags = 0x0;  	} -	if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) +	if (clear || (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv))  		last_update = &vm->last_update;  	else  		last_update = &bo_va->last_pt_update; @@ -1743,7 +1743,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,  	 * the evicted list so that it gets validated again on the  	 * next command submission.  	 */ -	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { +	if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) {  		uint32_t mem_type = bo->tbo.mem.mem_type;  		if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) @@ -1879,18 +1879,18 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,   */  static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)  { -	struct reservation_object *resv = vm->root.base.bo->tbo.resv; +	struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;  	struct dma_fence *excl, **shared;  	unsigned i, shared_count;  	int r; -	r = reservation_object_get_fences_rcu(resv, &excl, +	r = dma_resv_get_fences_rcu(resv, &excl,  					      &shared_count, &shared);  	if (r) {  		/* Not enough memory to grab the fence list, as last resort  		 * block for all the fences to complete.  		 */ -		reservation_object_wait_timeout_rcu(resv, true, false, +		dma_resv_wait_timeout_rcu(resv, true, false,  						    MAX_SCHEDULE_TIMEOUT);  		return;  	} @@ -1978,7 +1978,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,  			   struct amdgpu_vm *vm)  {  	struct amdgpu_bo_va *bo_va, *tmp; -	struct reservation_object *resv; +	struct dma_resv *resv;  	bool clear;  	int r; @@ -1993,11 +1993,11 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,  	while (!list_empty(&vm->invalidated)) {  		bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,  					 base.vm_status); -		resv = bo_va->base.bo->tbo.resv; +		resv = bo_va->base.bo->tbo.base.resv;  		spin_unlock(&vm->invalidated_lock);  		/* Try to reserve the BO to avoid clearing its ptes */ -		if (!amdgpu_vm_debug && reservation_object_trylock(resv)) +		if (!amdgpu_vm_debug && dma_resv_trylock(resv))  			clear = false;  		/* Somebody else is using the BO right now */  		else @@ -2008,7 +2008,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,  			return r;  		if (!clear) -			reservation_object_unlock(resv); +			dma_resv_unlock(resv);  		spin_lock(&vm->invalidated_lock);  	}  	spin_unlock(&vm->invalidated_lock); @@ -2084,7 +2084,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,  	if (mapping->flags & AMDGPU_PTE_PRT)  		amdgpu_vm_prt_get(adev); -	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && +	if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv &&  	    !bo_va->base.moved) {  		list_move(&bo_va->base.vm_status, &vm->moved);  	} @@ -2416,7 +2416,8 @@ void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)  			struct amdgpu_bo *bo;  			bo = mapping->bo_va->base.bo; -			if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket) +			if (dma_resv_locking_ctx(bo->tbo.base.resv) != +			    ticket)  				continue;  		} @@ -2443,7 +2444,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,  	struct amdgpu_vm_bo_base **base;  	if (bo) { -		if (bo->tbo.resv == vm->root.base.bo->tbo.resv) +		if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)  			vm->bulk_moveable = false;  		for (base = &bo_va->base.bo->vm_bo; *base; @@ -2507,7 +2508,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,  	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {  		struct amdgpu_vm *vm = bo_base->vm; -		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { +		if (evicted && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) {  			amdgpu_vm_bo_evicted(bo_base);  			continue;  		} @@ -2518,7 +2519,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,  		if (bo->tbo.type == ttm_bo_type_kernel)  			amdgpu_vm_bo_relocated(bo_base); -		else if (bo->tbo.resv == vm->root.base.bo->tbo.resv) +		else if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)  			amdgpu_vm_bo_moved(bo_base);  		else  			amdgpu_vm_bo_invalidated(bo_base); @@ -2648,7 +2649,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,   */  long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)  { -	return reservation_object_wait_timeout_rcu(vm->root.base.bo->tbo.resv, +	return dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv,  						   true, true, timeout);  } @@ -2723,7 +2724,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	if (r)  		goto error_free_root; -	r = reservation_object_reserve_shared(root->tbo.resv, 1); +	r = dma_resv_reserve_shared(root->tbo.base.resv, 1);  	if (r)  		goto error_unreserve; @@ -2862,6 +2863,13 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns  	WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),  		  "CPU update of VM recommended only for large BAR system\n"); +	if (vm->use_cpu_for_update) +		vm->update_funcs = &amdgpu_vm_cpu_funcs; +	else +		vm->update_funcs = &amdgpu_vm_sdma_funcs; +	dma_fence_put(vm->last_update); +	vm->last_update = NULL; +  	if (vm->pasid) {  		unsigned long flags; @@ -3060,12 +3068,12 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  	switch (args->in.op) {  	case AMDGPU_VM_OP_RESERVE_VMID:  		/* current, we only have requirement to reserve vmid from gfxhub */ -		r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); +		r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);  		if (r)  			return r;  		break;  	case AMDGPU_VM_OP_UNRESERVE_VMID: -		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); +		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);  		break;  	default:  		return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 489a162ca620..2eda3a8c330d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -90,7 +90,7 @@ struct amdgpu_bo_list_entry;                                  | AMDGPU_PTE_WRITEABLE  \                                  | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC)) -/* NAVI10 only */ +/* gfx10 */  #define AMDGPU_PTE_MTYPE_NV10(a)       ((uint64_t)(a) << 48)  #define AMDGPU_PTE_MTYPE_NV10_MASK     AMDGPU_PTE_MTYPE_NV10(7ULL) @@ -100,9 +100,10 @@ struct amdgpu_bo_list_entry;  #define AMDGPU_VM_FAULT_STOP_ALWAYS	2  /* max number of VMHUB */ -#define AMDGPU_MAX_VMHUBS			2 -#define AMDGPU_GFXHUB				0 -#define AMDGPU_MMHUB				1 +#define AMDGPU_MAX_VMHUBS			3 +#define AMDGPU_GFXHUB_0				0 +#define AMDGPU_MMHUB_0				1 +#define AMDGPU_MMHUB_1				2  /* hardcode that limit for now */  #define AMDGPU_VA_RESERVED_SIZE			(1ULL << 20) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index ddd181f5ed37..61fc584cbb1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -72,7 +72,7 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,  	if (r)  		return r; -	r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv, +	r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv,  			     owner, false);  	if (r)  		return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index d11eba09eadd..65aae75f80fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -25,7 +25,7 @@  #include "amdgpu.h"  #include "amdgpu_xgmi.h"  #include "amdgpu_smu.h" - +#include "df/df_3_6_offset.h"  static DEFINE_MUTEX(xgmi_mutex); @@ -131,9 +131,37 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,  } +#define AMDGPU_XGMI_SET_FICAA(o)	((o) | 0x456801) +static ssize_t amdgpu_xgmi_show_error(struct device *dev, +				      struct device_attribute *attr, +				      char *buf) +{ +	struct drm_device *ddev = dev_get_drvdata(dev); +	struct amdgpu_device *adev = ddev->dev_private; +	uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in; +	uint64_t fica_out; +	unsigned int error_count = 0; + +	ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); +	ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); -static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); +	fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in); +	if (fica_out != 0x1f) +		pr_err("xGMI error counters not enabled!\n"); + +	fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in); + +	if ((fica_out & 0xffff) == 2) +		error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63); +	adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); + +	return snprintf(buf, PAGE_SIZE, "%d\n", error_count); +} + + +static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); +static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);  static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,  					 struct amdgpu_hive_info *hive) @@ -148,6 +176,12 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,  		return ret;  	} +	/* Create xgmi error file */ +	ret = device_create_file(adev->dev, &dev_attr_xgmi_error); +	if (ret) +		pr_err("failed to create xgmi_error\n"); + +  	/* Create sysfs link to hive info folder on the first device */  	if (adev != hive->adev) {  		ret = sysfs_create_link(&adev->dev->kobj, hive->kobj, @@ -248,7 +282,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)  	dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); -	if (is_support_sw_smu(adev)) +	if (is_support_sw_smu_xgmi(adev))  		ret = smu_set_xgmi_pstate(&adev->smu, pstate);  	if (ret)  		dev_err(adev->dev, @@ -296,23 +330,28 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)  	struct amdgpu_xgmi	*entry;  	struct amdgpu_device *tmp_adev = NULL; -	int count = 0, ret = -EINVAL; +	int count = 0, ret = 0;  	if (!adev->gmc.xgmi.supported)  		return 0; -	ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); -	if (ret) { -		dev_err(adev->dev, -			"XGMI: Failed to get node id\n"); -		return ret; -	} +	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { +		ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); +		if (ret) { +			dev_err(adev->dev, +				"XGMI: Failed to get hive id\n"); +			return ret; +		} -	ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); -	if (ret) { -		dev_err(adev->dev, -			"XGMI: Failed to get hive id\n"); -		return ret; +		ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); +		if (ret) { +			dev_err(adev->dev, +				"XGMI: Failed to get node id\n"); +			return ret; +		} +	} else { +		adev->gmc.xgmi.hive_id = 16; +		adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16;  	}  	hive = amdgpu_get_xgmi_hive(adev, 1); @@ -332,29 +371,32 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)  	top_info->num_nodes = count;  	hive->number_devices = count; -	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { -		/* update node list for other device in the hive */ -		if (tmp_adev != adev) { -			top_info = &tmp_adev->psp.xgmi_context.top_info; -			top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id; -			top_info->num_nodes = count; +	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { +		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { +			/* update node list for other device in the hive */ +			if (tmp_adev != adev) { +				top_info = &tmp_adev->psp.xgmi_context.top_info; +				top_info->nodes[count - 1].node_id = +					adev->gmc.xgmi.node_id; +				top_info->num_nodes = count; +			} +			ret = amdgpu_xgmi_update_topology(hive, tmp_adev); +			if (ret) +				goto exit;  		} -		ret = amdgpu_xgmi_update_topology(hive, tmp_adev); -		if (ret) -			goto exit; -	} -	/* get latest topology info for each device from psp */ -	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { -		ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, -				&tmp_adev->psp.xgmi_context.top_info); -		if (ret) { -			dev_err(tmp_adev->dev, -				"XGMI: Get topology failure on device %llx, hive %llx, ret %d", -				tmp_adev->gmc.xgmi.node_id, -				tmp_adev->gmc.xgmi.hive_id, ret); -			/* To do : continue with some node failed or disable the whole hive */ -			goto exit; +		/* get latest topology info for each device from psp */ +		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { +			ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, +					&tmp_adev->psp.xgmi_context.top_info); +			if (ret) { +				dev_err(tmp_adev->dev, +					"XGMI: Get topology failure on device %llx, hive %llx, ret %d", +					tmp_adev->gmc.xgmi.node_id, +					tmp_adev->gmc.xgmi.hive_id, ret); +				/* To do : continue with some node failed or disable the whole hive */ +				goto exit; +			}  		}  	} diff --git a/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c new file mode 100644 index 000000000000..4853899b1824 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c @@ -0,0 +1,59 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "arct_ip_offset.h" + +int arct_reg_base_init(struct amdgpu_device *adev) +{ +	/* HW has more IP blocks,  only initialized the block needed by our driver  */ +	uint32_t i; +	for (i = 0 ; i < MAX_INSTANCE ; ++i) { +		adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); +		adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); +		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); +		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); +		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); +		adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); +		adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); +		adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); +		adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); +		adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); +		adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); +		adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i])); +		adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i])); +		adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i])); +		adev->reg_offset[SDMA5_HWIP][i] = (uint32_t *)(&(SDMA5_BASE.instance[i])); +		adev->reg_offset[SDMA6_HWIP][i] = (uint32_t *)(&(SDMA6_BASE.instance[i])); +		adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i])); +		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); +		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); +	} +	return 0; +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c new file mode 100644 index 000000000000..d9cc746af5e6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c @@ -0,0 +1,103 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "athub_v1_0.h" + +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "vega10_enum.h" + +#include "soc15_common.h" + +static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, +						   bool enable) +{ +	uint32_t def, data; + +	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + +	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) +		data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; +	else +		data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + +	if (def != data) +		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, +						  bool enable) +{ +	uint32_t def, data; + +	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + +	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && +	    (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) +		data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; +	else +		data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + +	if(def != data) +		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +int athub_v1_0_set_clockgating(struct amdgpu_device *adev, +			       enum amd_clockgating_state state) +{ +	if (amdgpu_sriov_vf(adev)) +		return 0; + +	switch (adev->asic_type) { +	case CHIP_VEGA10: +	case CHIP_VEGA12: +	case CHIP_VEGA20: +	case CHIP_RAVEN: +		athub_update_medium_grain_clock_gating(adev, +				state == AMD_CG_STATE_GATE ? true : false); +		athub_update_medium_grain_light_sleep(adev, +				state == AMD_CG_STATE_GATE ? true : false); +		break; +	default: +		break; +	} + +	return 0; +} + +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ +	int data; + +	if (amdgpu_sriov_vf(adev)) +		*flags = 0; + +	/* AMD_CG_SUPPORT_ATHUB_MGCG */ +	data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); +	if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) +		*flags |= AMD_CG_SUPPORT_ATHUB_MGCG; + +	/* AMD_CG_SUPPORT_ATHUB_LS */ +	if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK) +		*flags |= AMD_CG_SUPPORT_ATHUB_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h new file mode 100644 index 000000000000..b279af59e34f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __ATHUB_V1_0_H__ +#define __ATHUB_V1_0_H__ + +int athub_v1_0_set_clockgating(struct amdgpu_device *adev, +			       enum amd_clockgating_state state); +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index 89b32b6b81c8..ceb9aa4df0e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -74,6 +74,8 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev,  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		athub_v2_0_update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE ? true : false);  		athub_v2_0_update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 1ffbc0d3d7a1..b81bb414fcb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1291,6 +1291,12 @@ static int cik_asic_reset(struct amdgpu_device *adev)  	return r;  } +static enum amd_reset_method +cik_asic_reset_method(struct amdgpu_device *adev) +{ +	return AMD_RESET_METHOD_LEGACY; +} +  static u32 cik_get_config_memsize(struct amdgpu_device *adev)  {  	return RREG32(mmCONFIG_MEMSIZE); @@ -1823,6 +1829,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =  	.read_bios_from_rom = &cik_read_bios_from_rom,  	.read_register = &cik_read_register,  	.reset = &cik_asic_reset, +	.reset_method = &cik_asic_reset_method,  	.set_vga_state = &cik_vga_set_state,  	.get_xclk = &cik_get_xclk,  	.set_uvd_clocks = &cik_set_uvd_clocks, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 1ffd1963e765..645550e7caf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -236,6 +236,7 @@ static void dce_v10_0_page_flip(struct amdgpu_device *adev,  				int crtc_id, u64 crtc_base, bool async)  {  	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; +	struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;  	u32 tmp;  	/* flip at hsync for async, default is vsync */ @@ -243,6 +244,9 @@ static void dce_v10_0_page_flip(struct amdgpu_device *adev,  	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,  			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0);  	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); +	/* update pitch */ +	WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, +	       fb->pitches[0] / fb->format->cpp[0]);  	/* update the primary scanout address */  	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,  	       upper_32_bits(crtc_base)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 9e0782b54066..d9f470632b2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -254,6 +254,7 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev,  				int crtc_id, u64 crtc_base, bool async)  {  	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; +	struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;  	u32 tmp;  	/* flip immediate for async, default is vsync */ @@ -261,6 +262,9 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev,  	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,  			    GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);  	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); +	/* update pitch */ +	WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, +	       fb->pitches[0] / fb->format->cpp[0]);  	/* update the scanout addresses */  	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,  	       upper_32_bits(crtc_base)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 4bf453e07dca..3eb2e7429269 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -191,10 +191,14 @@ static void dce_v6_0_page_flip(struct amdgpu_device *adev,  			       int crtc_id, u64 crtc_base, bool async)  {  	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; +	struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;  	/* flip at hsync for async, default is vsync */  	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ?  	       GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); +	/* update pitch */ +	WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, +	       fb->pitches[0] / fb->format->cpp[0]);  	/* update the scanout addresses */  	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,  	       upper_32_bits(crtc_base)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index b23418ca8f6a..a16c5e9e610e 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -184,10 +184,14 @@ static void dce_v8_0_page_flip(struct amdgpu_device *adev,  			       int crtc_id, u64 crtc_base, bool async)  {  	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; +	struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb;  	/* flip at hsync for async, default is vsync */  	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ?  	       GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); +	/* update pitch */ +	WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, +	       fb->pitches[0] / fb->format->cpp[0]);  	/* update the primary scanout addresses */  	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,  	       upper_32_bits(crtc_base)); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 3cc0a16649f9..c9608ae8643b 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -454,13 +454,8 @@ static int dce_virtual_hw_init(void *handle)  #endif  		/* no DCE */  		break; -	case CHIP_VEGA10: -	case CHIP_VEGA12: -	case CHIP_VEGA20: -	case CHIP_NAVI10: -		break;  	default: -		DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); +		break;  	}  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index ef6e91f9f51c..5850c8e34caa 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -93,6 +93,96 @@ const struct attribute_group *df_v3_6_attr_groups[] = {  		NULL  }; +static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev, +				 uint32_t ficaa_val) +{ +	unsigned long flags, address, data; +	uint32_t ficadl_val, ficadh_val; + +	address = adev->nbio_funcs->get_pcie_index_offset(adev); +	data = adev->nbio_funcs->get_pcie_data_offset(adev); + +	spin_lock_irqsave(&adev->pcie_idx_lock, flags); +	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); +	WREG32(data, ficaa_val); + +	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); +	ficadl_val = RREG32(data); + +	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); +	ficadh_val = RREG32(data); + +	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + +	return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val); +} + +static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val, +			     uint32_t ficadl_val, uint32_t ficadh_val) +{ +	unsigned long flags, address, data; + +	address = adev->nbio_funcs->get_pcie_index_offset(adev); +	data = adev->nbio_funcs->get_pcie_data_offset(adev); + +	spin_lock_irqsave(&adev->pcie_idx_lock, flags); +	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); +	WREG32(data, ficaa_val); + +	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); +	WREG32(data, ficadl_val); + +	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); +	WREG32(data, ficadh_val); + +	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* + * df_v3_6_perfmon_rreg - read perfmon lo and hi + * + * required to be atomic.  no mmio method provided so subsequent reads for lo + * and hi require to preserve df finite state machine + */ +static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev, +			    uint32_t lo_addr, uint32_t *lo_val, +			    uint32_t hi_addr, uint32_t *hi_val) +{ +	unsigned long flags, address, data; + +	address = adev->nbio_funcs->get_pcie_index_offset(adev); +	data = adev->nbio_funcs->get_pcie_data_offset(adev); + +	spin_lock_irqsave(&adev->pcie_idx_lock, flags); +	WREG32(address, lo_addr); +	*lo_val = RREG32(data); +	WREG32(address, hi_addr); +	*hi_val = RREG32(data); +	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* + * df_v3_6_perfmon_wreg - write to perfmon lo and hi + * + * required to be atomic.  no mmio method provided so subsequent reads after + * data writes cannot occur to preserve data fabrics finite state machine. + */ +static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr, +			    uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val) +{ +	unsigned long flags, address, data; + +	address = adev->nbio_funcs->get_pcie_index_offset(adev); +	data = adev->nbio_funcs->get_pcie_data_offset(adev); + +	spin_lock_irqsave(&adev->pcie_idx_lock, flags); +	WREG32(address, lo_addr); +	WREG32(data, lo_val); +	WREG32(address, hi_addr); +	WREG32(data, hi_val); +	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} +  /* get the number of df counters available */  static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,  		struct device_attribute *attr, @@ -268,6 +358,10 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,  					  uint32_t *lo_val,  					  uint32_t *hi_val)  { + +	uint32_t eventsel, instance, unitmask; +	uint32_t instance_10, instance_5432, instance_76; +  	df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);  	if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { @@ -276,40 +370,33 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,  		return -ENXIO;  	} -	if (lo_val && hi_val) { -		uint32_t eventsel, instance, unitmask; -		uint32_t instance_10, instance_5432, instance_76; +	eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; +	unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; +	instance = DF_V3_6_GET_INSTANCE(config); -		eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; -		unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; -		instance = DF_V3_6_GET_INSTANCE(config); +	instance_10 = instance & 0x3; +	instance_5432 = (instance >> 2) & 0xf; +	instance_76 = (instance >> 6) & 0x3; -		instance_10 = instance & 0x3; -		instance_5432 = (instance >> 2) & 0xf; -		instance_76 = (instance >> 6) & 0x3; +	*lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22); +	*hi_val = (instance_76 << 29) | instance_5432; -		*lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel; -		*hi_val = (instance_76 << 29) | instance_5432; -	} +	DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", +		config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val);  	return 0;  } -/* assign df performance counters for read */ -static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, -				   uint64_t config, -				   int *is_assigned) +/* add df performance counters for read */ +static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev, +				   uint64_t config)  {  	int i, target_cntr; -	*is_assigned = 0; -  	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); -	if (target_cntr >= 0) { -		*is_assigned = 1; +	if (target_cntr >= 0)  		return 0; -	}  	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {  		if (adev->df_perfmon_config_assign_mask[i] == 0U) { @@ -344,45 +431,13 @@ static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,  	if ((lo_base_addr == 0) || (hi_base_addr == 0))  		return; -	WREG32_PCIE(lo_base_addr, 0UL); -	WREG32_PCIE(hi_base_addr, 0UL); -} - - -static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev, -				      uint64_t config) -{ -	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; -	int ret, is_assigned; - -	ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned); - -	if (ret || is_assigned) -		return ret; - -	ret = df_v3_6_pmc_get_ctrl_settings(adev, -			config, -			&lo_base_addr, -			&hi_base_addr, -			&lo_val, -			&hi_val); - -	if (ret) -		return ret; - -	DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", -			config, lo_base_addr, hi_base_addr, lo_val, hi_val); - -	WREG32_PCIE(lo_base_addr, lo_val); -	WREG32_PCIE(hi_base_addr, hi_val); - -	return ret; +	df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);  }  static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,  			     int is_enable)  { -	uint32_t lo_base_addr, hi_base_addr, lo_val; +	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;  	int ret = 0;  	switch (adev->asic_type) { @@ -391,24 +446,20 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,  		df_v3_6_reset_perfmon_cntr(adev, config);  		if (is_enable) { -			ret = df_v3_6_add_perfmon_cntr(adev, config); +			ret = df_v3_6_pmc_add_cntr(adev, config);  		} else {  			ret = df_v3_6_pmc_get_ctrl_settings(adev,  					config,  					&lo_base_addr,  					&hi_base_addr, -					NULL, -					NULL); +					&lo_val, +					&hi_val);  			if (ret)  				return ret; -			lo_val = RREG32_PCIE(lo_base_addr); - -			DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", -				config, lo_base_addr, hi_base_addr, lo_val); - -			WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); +			df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val, +					hi_base_addr, hi_val);  		}  		break; @@ -422,7 +473,7 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,  static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,  			    int is_disable)  { -	uint32_t lo_base_addr, hi_base_addr, lo_val; +	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;  	int ret = 0;  	switch (adev->asic_type) { @@ -431,18 +482,13 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,  			config,  			&lo_base_addr,  			&hi_base_addr, -			NULL, -			NULL); +			&lo_val, +			&hi_val);  		if (ret)  			return ret; -		lo_val = RREG32_PCIE(lo_base_addr); - -		DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", -				config, lo_base_addr, hi_base_addr, lo_val); - -		WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); +		df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);  		if (is_disable)  			df_v3_6_pmc_release_cntr(adev, config); @@ -471,8 +517,8 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,  		if ((lo_base_addr == 0) || (hi_base_addr == 0))  			return; -		lo_val = RREG32_PCIE(lo_base_addr); -		hi_val = RREG32_PCIE(hi_base_addr); +		df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val, +				hi_base_addr, &hi_val);  		*count  = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); @@ -480,7 +526,7 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,  			*count = 0;  		DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", -			config, lo_base_addr, hi_base_addr, lo_val, hi_val); +			 config, lo_base_addr, hi_base_addr, lo_val, hi_val);  		break; @@ -499,5 +545,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {  	.get_clockgating_state = df_v3_6_get_clockgating_state,  	.pmc_start = df_v3_6_pmc_start,  	.pmc_stop = df_v3_6_pmc_stop, -	.pmc_get_count = df_v3_6_pmc_get_count +	.pmc_get_count = df_v3_6_pmc_get_count, +	.get_fica = df_v3_6_get_fica, +	.set_fica = df_v3_6_set_fica  }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 1675d5837c3c..638c821611ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -20,8 +20,12 @@   * OTHER DEALINGS IN THE SOFTWARE.   *   */ + +#include <linux/delay.h> +#include <linux/kernel.h>  #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h>  #include "amdgpu.h"  #include "amdgpu_gfx.h"  #include "amdgpu_psp.h" @@ -56,6 +60,9 @@  #define F32_CE_PROGRAM_RAM_SIZE		65536  #define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L +#define mmCGTT_GS_NGG_CLK_CTRL	0x5087 +#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX	1 +  MODULE_FIRMWARE("amdgpu/navi10_ce.bin");  MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");  MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -63,6 +70,25 @@ MODULE_FIRMWARE("amdgpu/navi10_mec.bin");  MODULE_FIRMWARE("amdgpu/navi10_mec2.bin");  MODULE_FIRMWARE("amdgpu/navi10_rlc.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ce_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_pfp_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_me_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec2_wks.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi14_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi14_me.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi14_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/navi12_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi12_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi12_me.bin"); +MODULE_FIRMWARE("amdgpu/navi12_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi12_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi12_rlc.bin"); +  static const struct soc15_reg_golden golden_settings_gc_10_1[] =  {  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -109,6 +135,99 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] =  	/* Pending on emulation bring up */  }; +static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = +{ +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000), +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = +{ +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xffffffff, 0x842a4c02), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04440000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000) +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = +{ +	/* Pending on emulation bring up */ +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = +{ +	/* Pending on emulation bring up */ +}; +  #define DEFAULT_SH_MEM_CONFIG \  	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \  	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -250,6 +369,22 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)  						golden_settings_gc_10_0_nv10,  						(const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10));  		break; +	case CHIP_NAVI14: +		soc15_program_register_sequence(adev, +						golden_settings_gc_10_1_1, +						(const u32)ARRAY_SIZE(golden_settings_gc_10_1_1)); +		soc15_program_register_sequence(adev, +						golden_settings_gc_10_1_nv14, +						(const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14)); +		break; +	case CHIP_NAVI12: +		soc15_program_register_sequence(adev, +						golden_settings_gc_10_1_2, +						(const u32)ARRAY_SIZE(golden_settings_gc_10_1_2)); +		soc15_program_register_sequence(adev, +						golden_settings_gc_10_1_2_nv12, +						(const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12)); +		break;  	default:  		break;  	} @@ -331,7 +466,7 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)  		if (amdgpu_emu_mode == 1)  			msleep(1);  		else -			DRM_UDELAY(1); +			udelay(1);  	}  	if (i < adev->usec_timeout) {  		if (amdgpu_emu_mode == 1) @@ -464,7 +599,8 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)  static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)  {  	const char *chip_name; -	char fw_name[30]; +	char fw_name[40]; +	char wks[10];  	int err;  	struct amdgpu_firmware_info *info = NULL;  	const struct common_firmware_header *header = NULL; @@ -477,15 +613,25 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)  	DRM_DEBUG("\n"); +	memset(wks, 0, sizeof(wks));  	switch (adev->asic_type) {  	case CHIP_NAVI10:  		chip_name = "navi10";  		break; +	case CHIP_NAVI14: +		chip_name = "navi14"; +		if (!(adev->pdev->device == 0x7340 && +		      adev->pdev->revision != 0x00)) +			snprintf(wks, sizeof(wks), "_wks"); +		break; +	case CHIP_NAVI12: +		chip_name = "navi12"; +		break;  	default:  		BUG();  	} -	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp%s.bin", chip_name, wks);  	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -496,7 +642,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)  	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);  	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); -	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me%s.bin", chip_name, wks);  	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -507,7 +653,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)  	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);  	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); -	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce%s.bin", chip_name, wks);  	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -572,7 +718,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)  	if (adev->gfx.rlc.is_rlc_v2_1)  		gfx_v10_0_init_rlc_ext_microcode(adev); -	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec%s.bin", chip_name, wks);  	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -583,7 +729,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)  	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);  	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); -	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2%s.bin", chip_name, wks);  	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);  	if (!err) {  		err = amdgpu_ucode_validate(adev->gfx.mec2_fw); @@ -1026,6 +1172,8 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		adev->gfx.config.max_hw_contexts = 8;  		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;  		adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1133,6 +1281,8 @@ static int gfx_v10_0_sw_init(void *handle)  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		adev->gfx.me.num_me = 1;  		adev->gfx.me.num_pipe_per_me = 2;  		adev->gfx.me.num_queue_per_pipe = 1; @@ -1441,8 +1591,36 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)  	}  	nv_grbm_select(adev, 0, 0, 0, 0);  	mutex_unlock(&adev->srbm_mutex); + +	/* Initialize all compute VMIDs to have no GDS, GWS, or OA +	   acccess. These should be enabled by FW for target VMIDs. */ +	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); +	}  } +static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) +{ +	int vmid; + +	/* +	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA +	 * access. Compute VMIDs should be enabled by FW for target VMIDs, +	 * the driver can enable them for graphics. VMID0 should maintain +	 * access so that HWS firmware can save/restore entries. +	 */ +	for (vmid = 1; vmid < 16; vmid++) { +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); +	} +} + +  static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)  {  	int i, j, k; @@ -1452,7 +1630,8 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)  	u32 utcl_invreq_disable = 0;  	/*  	 * GCRD_TARGETS_DISABLE field contains -	 * for Navi10: GL1C=[18:15], SQC=[14:10], TCP=[9:0] +	 * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0] +	 * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0]  	 */  	u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask(  		2 * max_wgp_per_sh + /* TCP */ @@ -1460,7 +1639,8 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)  		4); /* GL1C */  	/*  	 * UTCL1_UTCL0_INVREQ_DISABLE field contains -	 * for Navi10: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] +	 * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] +	 * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0]  	 */  	u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask(  		2 * max_wgp_per_sh + /* TCP */ @@ -1468,7 +1648,9 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)  		4 + /* RMI */  		1); /* SQG */ -	if (adev->asic_type == CHIP_NAVI10) { +	if (adev->asic_type == CHIP_NAVI10 || +	    adev->asic_type == CHIP_NAVI14 || +	    adev->asic_type == CHIP_NAVI12) {  		mutex_lock(&adev->grbm_idx_mutex);  		for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {  			for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { @@ -1526,7 +1708,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)  	/* XXX SH_MEM regs */  	/* where to put LDS, scratch, GPUVM in FSA64 space */  	mutex_lock(&adev->srbm_mutex); -	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { +	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {  		nv_grbm_select(adev, 0, 0, 0, i);  		/* CP and shaders */  		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -1543,6 +1725,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)  	mutex_unlock(&adev->srbm_mutex);  	gfx_v10_0_init_compute_vmid(adev); +	gfx_v10_0_init_gds_vmid(adev);  } @@ -1575,9 +1758,12 @@ static void gfx_v10_0_init_csb(struct amdgpu_device *adev)  static void gfx_v10_0_init_pg(struct amdgpu_device *adev)  { +	int i; +  	gfx_v10_0_init_csb(adev); -	amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); +	for (i = 0; i < adev->num_vmhubs; i++) +		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);  	/* TODO: init power gating */  	return; @@ -1615,9 +1801,9 @@ static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,  		 * hence no handshake between SMU & RLC  		 * GFXOFF will be disabled  		 */ -		rlc_pg_cntl |= 0x80000; +		rlc_pg_cntl |= 0x800000;  	} else -		rlc_pg_cntl &= ~0x80000; +		rlc_pg_cntl &= ~0x800000;  	WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl);  } @@ -3605,20 +3791,12 @@ static int gfx_v10_0_hw_fini(void *handle)  static int gfx_v10_0_suspend(void *handle)  { -	struct amdgpu_device *adev = (struct amdgpu_device *)handle; - -	adev->in_suspend = true; -	return gfx_v10_0_hw_fini(adev); +	return gfx_v10_0_hw_fini(handle);  }  static int gfx_v10_0_resume(void *handle)  { -	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	int r; - -	r = gfx_v10_0_hw_init(adev); -	adev->in_suspend = false; -	return r; +	return gfx_v10_0_hw_init(handle);  }  static bool gfx_v10_0_is_idle(void *handle) @@ -4028,6 +4206,7 @@ static int gfx_v10_0_set_powergating_state(void *handle,  	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14:  		if (!enable) {  			amdgpu_gfx_off_ctrl(adev, false);  			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); @@ -4047,6 +4226,8 @@ static int gfx_v10_0_set_clockgating_state(void *handle,  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		gfx_v10_0_update_gfx_clock_gating(adev,  						 state == AMD_CG_STATE_GATE ? true : false);  		break; @@ -4197,15 +4378,6 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,  	unsigned vmid = AMDGPU_JOB_GET_VMID(job);  	u32 header, control = 0; -	/* Prevent a hw deadlock due to a wave ID mismatch between ME and GDS. -	 * This resets the wave ID counters. (needed by transform feedback) -	 * TODO: This might only be needed on a VMID switch when we change -	 *       the GDS OA mapping, not sure. -	 */ -	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); -	amdgpu_ring_write(ring, mmVGT_GS_MAX_WAVE_ID); -	amdgpu_ring_write(ring, ring->adev->gds.vgt_gs_max_wave_id); -  	if (ib->flags & AMDGPU_IB_FLAG_CE)  		header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2);  	else @@ -4453,7 +4625,7 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)  		if (ring->trail_seq ==  		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))  			break; -		DRM_UDELAY(1); +		udelay(1);  	}  	if (i >= adev->usec_timeout) { @@ -4611,6 +4783,7 @@ gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,  		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,  					    TIME_STAMP_INT_ENABLE, 0);  		WREG32(cp_int_cntl_reg, cp_int_cntl); +		break;  	case AMDGPU_IRQ_STATE_ENABLE:  		cp_int_cntl = RREG32(cp_int_cntl_reg);  		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, @@ -4926,7 +5099,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {  	.align_mask = 0xff,  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),  	.support_64bit_ptrs = true, -	.vmhub = AMDGPU_GFXHUB, +	.vmhub = AMDGPU_GFXHUB_0,  	.get_rptr = gfx_v10_0_ring_get_rptr_gfx,  	.get_wptr = gfx_v10_0_ring_get_wptr_gfx,  	.set_wptr = gfx_v10_0_ring_set_wptr_gfx, @@ -4951,7 +5124,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {  		5 + /* HDP_INVL */  		8 + 8 + /* FENCE x2 */  		2, /* SWITCH_BUFFER */ -	.emit_ib_size =	7, /* gfx_v10_0_ring_emit_ib_gfx */ +	.emit_ib_size =	4, /* gfx_v10_0_ring_emit_ib_gfx */  	.emit_ib = gfx_v10_0_ring_emit_ib_gfx,  	.emit_fence = gfx_v10_0_ring_emit_fence,  	.emit_pipeline_sync = gfx_v10_0_ring_emit_pipeline_sync, @@ -4977,7 +5150,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {  	.align_mask = 0xff,  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),  	.support_64bit_ptrs = true, -	.vmhub = AMDGPU_GFXHUB, +	.vmhub = AMDGPU_GFXHUB_0,  	.get_rptr = gfx_v10_0_ring_get_rptr_compute,  	.get_wptr = gfx_v10_0_ring_get_wptr_compute,  	.set_wptr = gfx_v10_0_ring_set_wptr_compute, @@ -5010,7 +5183,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {  	.align_mask = 0xff,  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),  	.support_64bit_ptrs = true, -	.vmhub = AMDGPU_GFXHUB, +	.vmhub = AMDGPU_GFXHUB_0,  	.get_rptr = gfx_v10_0_ring_get_rptr_compute,  	.get_wptr = gfx_v10_0_ring_get_wptr_compute,  	.set_wptr = gfx_v10_0_ring_set_wptr_compute, @@ -5087,6 +5260,8 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)  {  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;  		break;  	default: @@ -5102,7 +5277,6 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)  	default:  		adev->gds.gds_size = 0x10000;  		adev->gds.gds_compute_max_wave_id = 0x4ff; -		adev->gds.vgt_gs_max_wave_id = 0x3ff;  		break;  	} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 0db9f488da7e..791ba398f007 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1879,6 +1879,33 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)  	}  	cik_srbm_select(adev, 0, 0, 0, 0);  	mutex_unlock(&adev->srbm_mutex); + +	/* Initialize all compute VMIDs to have no GDS, GWS, or OA +	   acccess. These should be enabled by FW for target VMIDs. */ +	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { +		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); +		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); +		WREG32(amdgpu_gds_reg_offset[i].gws, 0); +		WREG32(amdgpu_gds_reg_offset[i].oa, 0); +	} +} + +static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) +{ +	int vmid; + +	/* +	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA +	 * access. Compute VMIDs should be enabled by FW for target VMIDs, +	 * the driver can enable them for graphics. VMID0 should maintain +	 * access so that HWS firmware can save/restore entries. +	 */ +	for (vmid = 1; vmid < 16; vmid++) { +		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); +		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); +		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); +		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); +	}  }  static void gfx_v7_0_config_init(struct amdgpu_device *adev) @@ -1959,6 +1986,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev)  	mutex_unlock(&adev->srbm_mutex);  	gfx_v7_0_init_compute_vmid(adev); +	gfx_v7_0_init_gds_vmid(adev);  	WREG32(mmSX_DEBUG_1, 0x20); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5f401b41ef7c..87dd55e9d72b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1321,6 +1321,39 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)  	return 0;  } +static int gfx_v8_0_csb_vram_pin(struct amdgpu_device *adev) +{ +	int r; + +	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); +	if (unlikely(r != 0)) +		return r; + +	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, +			AMDGPU_GEM_DOMAIN_VRAM); +	if (!r) +		adev->gfx.rlc.clear_state_gpu_addr = +			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); + +	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + +	return r; +} + +static void gfx_v8_0_csb_vram_unpin(struct amdgpu_device *adev) +{ +	int r; + +	if (!adev->gfx.rlc.clear_state_obj) +		return; + +	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); +	if (likely(r == 0)) { +		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); +		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); +	} +} +  static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)  {  	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -3706,6 +3739,33 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)  	}  	vi_srbm_select(adev, 0, 0, 0, 0);  	mutex_unlock(&adev->srbm_mutex); + +	/* Initialize all compute VMIDs to have no GDS, GWS, or OA +	   acccess. These should be enabled by FW for target VMIDs. */ +	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { +		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); +		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); +		WREG32(amdgpu_gds_reg_offset[i].gws, 0); +		WREG32(amdgpu_gds_reg_offset[i].oa, 0); +	} +} + +static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev) +{ +	int vmid; + +	/* +	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA +	 * access. Compute VMIDs should be enabled by FW for target VMIDs, +	 * the driver can enable them for graphics. VMID0 should maintain +	 * access so that HWS firmware can save/restore entries. +	 */ +	for (vmid = 1; vmid < 16; vmid++) { +		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); +		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); +		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); +		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); +	}  }  static void gfx_v8_0_config_init(struct amdgpu_device *adev) @@ -3774,6 +3834,7 @@ static void gfx_v8_0_constants_init(struct amdgpu_device *adev)  	mutex_unlock(&adev->srbm_mutex);  	gfx_v8_0_init_compute_vmid(adev); +	gfx_v8_0_init_gds_vmid(adev);  	mutex_lock(&adev->grbm_idx_mutex);  	/* @@ -4776,6 +4837,10 @@ static int gfx_v8_0_hw_init(void *handle)  	gfx_v8_0_init_golden_registers(adev);  	gfx_v8_0_constants_init(adev); +	r = gfx_v8_0_csb_vram_pin(adev); +	if (r) +		return r; +  	r = adev->gfx.rlc.funcs->resume(adev);  	if (r)  		return r; @@ -4892,6 +4957,9 @@ static int gfx_v8_0_hw_fini(void *handle)  	else  		pr_err("rlc is busy, skip halt rlc\n");  	amdgpu_gfx_rlc_exit_safe_mode(adev); + +	gfx_v8_0_csb_vram_unpin(adev); +  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f4c4eea62526..dcadc73bffd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -36,10 +36,10 @@  #include "gc/gc_9_0_offset.h"  #include "gc/gc_9_0_sh_mask.h" +  #include "vega10_enum.h"  #include "hdp/hdp_4_0_offset.h" -#include "soc15.h"  #include "soc15_common.h"  #include "clearstate_gfx9.h"  #include "v9_structs.h" @@ -60,6 +60,9 @@  #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L  #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L +#define mmGCEA_PROBE_MAP                        0x070c +#define mmGCEA_PROBE_MAP_BASE_IDX               0 +  MODULE_FIRMWARE("amdgpu/vega10_ce.bin");  MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");  MODULE_FIRMWARE("amdgpu/vega10_me.bin"); @@ -104,6 +107,397 @@ MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");  MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");  MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); +MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); +MODULE_FIRMWARE("amdgpu/renoir_me.bin"); +MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); +MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); +MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); + +#define mmTCP_CHAN_STEER_0_ARCT								0x0b03 +#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0 +#define mmTCP_CHAN_STEER_1_ARCT								0x0b04 +#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0 +#define mmTCP_CHAN_STEER_2_ARCT								0x0b09 +#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0 +#define mmTCP_CHAN_STEER_3_ARCT								0x0b0a +#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0 +#define mmTCP_CHAN_STEER_4_ARCT								0x0b0b +#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0 +#define mmTCP_CHAN_STEER_5_ARCT								0x0b0c +#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0 + +enum ta_ras_gfx_subblock { +	/*CPC*/ +	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, +	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, +	TA_RAS_BLOCK__GFX_CPC_UCODE, +	TA_RAS_BLOCK__GFX_DC_STATE_ME1, +	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, +	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, +	TA_RAS_BLOCK__GFX_DC_STATE_ME2, +	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, +	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, +	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, +	/* CPF*/ +	TA_RAS_BLOCK__GFX_CPF_INDEX_START, +	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, +	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, +	TA_RAS_BLOCK__GFX_CPF_TAG, +	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, +	/* CPG*/ +	TA_RAS_BLOCK__GFX_CPG_INDEX_START, +	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, +	TA_RAS_BLOCK__GFX_CPG_DMA_TAG, +	TA_RAS_BLOCK__GFX_CPG_TAG, +	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, +	/* GDS*/ +	TA_RAS_BLOCK__GFX_GDS_INDEX_START, +	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, +	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, +	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, +	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, +	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, +	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, +	/* SPI*/ +	TA_RAS_BLOCK__GFX_SPI_SR_MEM, +	/* SQ*/ +	TA_RAS_BLOCK__GFX_SQ_INDEX_START, +	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, +	TA_RAS_BLOCK__GFX_SQ_LDS_D, +	TA_RAS_BLOCK__GFX_SQ_LDS_I, +	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ +	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, +	/* SQC (3 ranges)*/ +	TA_RAS_BLOCK__GFX_SQC_INDEX_START, +	/* SQC range 0*/ +	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, +	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = +		TA_RAS_BLOCK__GFX_SQC_INDEX0_START, +	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, +	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, +	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, +	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, +	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, +	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, +	TA_RAS_BLOCK__GFX_SQC_INDEX0_END = +		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, +	/* SQC range 1*/ +	TA_RAS_BLOCK__GFX_SQC_INDEX1_START, +	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = +		TA_RAS_BLOCK__GFX_SQC_INDEX1_START, +	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, +	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, +	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, +	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, +	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, +	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, +	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, +	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, +	TA_RAS_BLOCK__GFX_SQC_INDEX1_END = +		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, +	/* SQC range 2*/ +	TA_RAS_BLOCK__GFX_SQC_INDEX2_START, +	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = +		TA_RAS_BLOCK__GFX_SQC_INDEX2_START, +	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, +	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, +	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, +	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, +	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, +	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, +	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, +	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, +	TA_RAS_BLOCK__GFX_SQC_INDEX2_END = +		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, +	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, +	/* TA*/ +	TA_RAS_BLOCK__GFX_TA_INDEX_START, +	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, +	TA_RAS_BLOCK__GFX_TA_FS_AFIFO, +	TA_RAS_BLOCK__GFX_TA_FL_LFIFO, +	TA_RAS_BLOCK__GFX_TA_FX_LFIFO, +	TA_RAS_BLOCK__GFX_TA_FS_CFIFO, +	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, +	/* TCA*/ +	TA_RAS_BLOCK__GFX_TCA_INDEX_START, +	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, +	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, +	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, +	/* TCC (5 sub-ranges)*/ +	TA_RAS_BLOCK__GFX_TCC_INDEX_START, +	/* TCC range 0*/ +	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, +	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, +	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, +	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, +	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, +	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, +	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, +	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, +	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, +	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, +	/* TCC range 1*/ +	TA_RAS_BLOCK__GFX_TCC_INDEX1_START, +	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, +	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, +	TA_RAS_BLOCK__GFX_TCC_INDEX1_END = +		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, +	/* TCC range 2*/ +	TA_RAS_BLOCK__GFX_TCC_INDEX2_START, +	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, +	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, +	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, +	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, +	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, +	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, +	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, +	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, +	TA_RAS_BLOCK__GFX_TCC_INDEX2_END = +		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, +	/* TCC range 3*/ +	TA_RAS_BLOCK__GFX_TCC_INDEX3_START, +	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, +	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, +	TA_RAS_BLOCK__GFX_TCC_INDEX3_END = +		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, +	/* TCC range 4*/ +	TA_RAS_BLOCK__GFX_TCC_INDEX4_START, +	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = +		TA_RAS_BLOCK__GFX_TCC_INDEX4_START, +	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, +	TA_RAS_BLOCK__GFX_TCC_INDEX4_END = +		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, +	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, +	/* TCI*/ +	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, +	/* TCP*/ +	TA_RAS_BLOCK__GFX_TCP_INDEX_START, +	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, +	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, +	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, +	TA_RAS_BLOCK__GFX_TCP_VM_FIFO, +	TA_RAS_BLOCK__GFX_TCP_DB_RAM, +	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, +	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, +	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, +	/* TD*/ +	TA_RAS_BLOCK__GFX_TD_INDEX_START, +	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, +	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, +	TA_RAS_BLOCK__GFX_TD_CS_FIFO, +	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, +	/* EA (3 sub-ranges)*/ +	TA_RAS_BLOCK__GFX_EA_INDEX_START, +	/* EA range 0*/ +	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, +	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, +	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, +	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, +	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, +	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, +	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, +	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, +	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, +	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, +	/* EA range 1*/ +	TA_RAS_BLOCK__GFX_EA_INDEX1_START, +	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, +	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, +	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, +	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, +	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, +	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, +	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, +	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, +	/* EA range 2*/ +	TA_RAS_BLOCK__GFX_EA_INDEX2_START, +	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, +	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, +	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, +	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, +	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, +	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, +	/* UTC VM L2 bank*/ +	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, +	/* UTC VM walker*/ +	TA_RAS_BLOCK__UTC_VML2_WALKER, +	/* UTC ATC L2 2MB cache*/ +	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, +	/* UTC ATC L2 4KB cache*/ +	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, +	TA_RAS_BLOCK__GFX_MAX +}; + +struct ras_gfx_subblock { +	unsigned char *name; +	int ta_subblock; +	int hw_supported_error_type; +	int sw_supported_error_type; +}; + +#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \ +	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \ +		#subblock,                                                     \ +		TA_RAS_BLOCK__##subblock,                                      \ +		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \ +		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \ +	} + +static const struct ras_gfx_subblock ras_gfx_subblocks[] = { +	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, +			     0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, +			     0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, +			     0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, +			     1), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, +			     0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, +			     0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, +			     0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, +			     0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, +			     1), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, +			     1), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, +			     1), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, +			     0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, +			     0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), +	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), +}; +  static const struct soc15_reg_golden golden_settings_gc_9_0[] =  {  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), @@ -227,6 +621,22 @@ static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),  }; +static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = +{ +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), +}; +  static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =  {  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), @@ -271,6 +681,18 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)  }; +static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = +{ +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), +}; +  static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =  {  	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, @@ -310,19 +732,21 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);  static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);  static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);  static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); +static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, +					  void *ras_error_status); +static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, +				     void *inject_if);  static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)  {  	switch (adev->asic_type) {  	case CHIP_VEGA10: -		if (!amdgpu_virt_support_skip_setting(adev)) { -			soc15_program_register_sequence(adev, -							 golden_settings_gc_9_0, -							 ARRAY_SIZE(golden_settings_gc_9_0)); -			soc15_program_register_sequence(adev, -							 golden_settings_gc_9_0_vg10, -							 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); -		} +		soc15_program_register_sequence(adev, +						golden_settings_gc_9_0, +						ARRAY_SIZE(golden_settings_gc_9_0)); +		soc15_program_register_sequence(adev, +						golden_settings_gc_9_0_vg10, +						ARRAY_SIZE(golden_settings_gc_9_0_vg10));  		break;  	case CHIP_VEGA12:  		soc15_program_register_sequence(adev, @@ -340,6 +764,11 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)  						golden_settings_gc_9_0_vg20,  						ARRAY_SIZE(golden_settings_gc_9_0_vg20));  		break; +	case CHIP_ARCTURUS: +		soc15_program_register_sequence(adev, +						golden_settings_gc_9_4_1_arct, +						ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); +		break;  	case CHIP_RAVEN:  		soc15_program_register_sequence(adev, golden_settings_gc_9_1,  						ARRAY_SIZE(golden_settings_gc_9_1)); @@ -352,12 +781,18 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)  							golden_settings_gc_9_1_rv1,  							ARRAY_SIZE(golden_settings_gc_9_1_rv1));  		break; +	 case CHIP_RENOIR: +		soc15_program_register_sequence(adev, +						golden_settings_gc_9_1_rn, +						ARRAY_SIZE(golden_settings_gc_9_1_rn)); +		return; /* for renoir, don't need common goldensetting */  	default:  		break;  	} -	soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, -					(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); +	if (adev->asic_type != CHIP_ARCTURUS) +		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, +						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));  }  static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) @@ -596,58 +1031,32 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)  	case CHIP_VEGA20:  		break;  	case CHIP_RAVEN: -		if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) -			break; -		if ((adev->gfx.rlc_fw_version != 106 && -		     adev->gfx.rlc_fw_version < 531) || -		    (adev->gfx.rlc_fw_version == 53815) || -		    (adev->gfx.rlc_feature_version < 1) || -		    !adev->gfx.rlc.is_rlc_v2_1) +		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) +			&&((adev->gfx.rlc_fw_version != 106 && +			     adev->gfx.rlc_fw_version < 531) || +			    (adev->gfx.rlc_fw_version == 53815) || +			    (adev->gfx.rlc_feature_version < 1) || +			    !adev->gfx.rlc.is_rlc_v2_1))  			adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + +		if (adev->pm.pp_feature & PP_GFXOFF_MASK) +			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | +				AMD_PG_SUPPORT_CP | +				AMD_PG_SUPPORT_RLC_SMU_HS;  		break;  	default:  		break;  	}  } -static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, +					  const char *chip_name)  { -	const char *chip_name;  	char fw_name[30];  	int err;  	struct amdgpu_firmware_info *info = NULL;  	const struct common_firmware_header *header = NULL;  	const struct gfx_firmware_header_v1_0 *cp_hdr; -	const struct rlc_firmware_header_v2_0 *rlc_hdr; -	unsigned int *tmp = NULL; -	unsigned int i = 0; -	uint16_t version_major; -	uint16_t version_minor; -	uint32_t smu_version; - -	DRM_DEBUG("\n"); - -	switch (adev->asic_type) { -	case CHIP_VEGA10: -		chip_name = "vega10"; -		break; -	case CHIP_VEGA12: -		chip_name = "vega12"; -		break; -	case CHIP_VEGA20: -		chip_name = "vega20"; -		break; -	case CHIP_RAVEN: -		if (adev->rev_id >= 8) -			chip_name = "raven2"; -		else if (adev->pdev->device == 0x15d8) -			chip_name = "picasso"; -		else -			chip_name = "raven"; -		break; -	default: -		BUG(); -	}  	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);  	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); @@ -682,6 +1091,58 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)  	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);  	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); +	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { +		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; +		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; +		info->fw = adev->gfx.pfp_fw; +		header = (const struct common_firmware_header *)info->fw->data; +		adev->firmware.fw_size += +			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + +		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; +		info->ucode_id = AMDGPU_UCODE_ID_CP_ME; +		info->fw = adev->gfx.me_fw; +		header = (const struct common_firmware_header *)info->fw->data; +		adev->firmware.fw_size += +			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + +		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; +		info->ucode_id = AMDGPU_UCODE_ID_CP_CE; +		info->fw = adev->gfx.ce_fw; +		header = (const struct common_firmware_header *)info->fw->data; +		adev->firmware.fw_size += +			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); +	} + +out: +	if (err) { +		dev_err(adev->dev, +			"gfx9: Failed to load firmware \"%s\"\n", +			fw_name); +		release_firmware(adev->gfx.pfp_fw); +		adev->gfx.pfp_fw = NULL; +		release_firmware(adev->gfx.me_fw); +		adev->gfx.me_fw = NULL; +		release_firmware(adev->gfx.ce_fw); +		adev->gfx.ce_fw = NULL; +	} +	return err; +} + +static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, +					  const char *chip_name) +{ +	char fw_name[30]; +	int err; +	struct amdgpu_firmware_info *info = NULL; +	const struct common_firmware_header *header = NULL; +	const struct rlc_firmware_header_v2_0 *rlc_hdr; +	unsigned int *tmp = NULL; +	unsigned int i = 0; +	uint16_t version_major; +	uint16_t version_minor; +	uint32_t smu_version; +  	/*  	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin  	 * instead of picasso_rlc.bin. @@ -756,57 +1217,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)  	if (adev->gfx.rlc.is_rlc_v2_1)  		gfx_v9_0_init_rlc_ext_microcode(adev); -	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); -	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); -	if (err) -		goto out; -	err = amdgpu_ucode_validate(adev->gfx.mec_fw); -	if (err) -		goto out; -	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; -	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); -	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - - -	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); -	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); -	if (!err) { -		err = amdgpu_ucode_validate(adev->gfx.mec2_fw); -		if (err) -			goto out; -		cp_hdr = (const struct gfx_firmware_header_v1_0 *) -		adev->gfx.mec2_fw->data; -		adev->gfx.mec2_fw_version = -		le32_to_cpu(cp_hdr->header.ucode_version); -		adev->gfx.mec2_feature_version = -		le32_to_cpu(cp_hdr->ucode_feature_version); -	} else { -		err = 0; -		adev->gfx.mec2_fw = NULL; -	} -  	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { -		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; -		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; -		info->fw = adev->gfx.pfp_fw; -		header = (const struct common_firmware_header *)info->fw->data; -		adev->firmware.fw_size += -			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - -		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; -		info->ucode_id = AMDGPU_UCODE_ID_CP_ME; -		info->fw = adev->gfx.me_fw; -		header = (const struct common_firmware_header *)info->fw->data; -		adev->firmware.fw_size += -			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - -		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; -		info->ucode_id = AMDGPU_UCODE_ID_CP_CE; -		info->fw = adev->gfx.ce_fw; -		header = (const struct common_firmware_header *)info->fw->data; -		adev->firmware.fw_size += -			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); -  		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];  		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;  		info->fw = adev->gfx.rlc_fw; @@ -836,7 +1247,58 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)  			adev->firmware.fw_size +=  				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);  		} +	} + +out: +	if (err) { +		dev_err(adev->dev, +			"gfx9: Failed to load firmware \"%s\"\n", +			fw_name); +		release_firmware(adev->gfx.rlc_fw); +		adev->gfx.rlc_fw = NULL; +	} +	return err; +} + +static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, +					  const char *chip_name) +{ +	char fw_name[30]; +	int err; +	struct amdgpu_firmware_info *info = NULL; +	const struct common_firmware_header *header = NULL; +	const struct gfx_firmware_header_v1_0 *cp_hdr; + +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); +	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); +	if (err) +		goto out; +	err = amdgpu_ucode_validate(adev->gfx.mec_fw); +	if (err) +		goto out; +	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; +	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); +	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); +	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); +	if (!err) { +		err = amdgpu_ucode_validate(adev->gfx.mec2_fw); +		if (err) +			goto out; +		cp_hdr = (const struct gfx_firmware_header_v1_0 *) +		adev->gfx.mec2_fw->data; +		adev->gfx.mec2_fw_version = +		le32_to_cpu(cp_hdr->header.ucode_version); +		adev->gfx.mec2_feature_version = +		le32_to_cpu(cp_hdr->ucode_feature_version); +	} else { +		err = 0; +		adev->gfx.mec2_fw = NULL; +	} + +	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {  		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];  		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;  		info->fw = adev->gfx.mec_fw; @@ -859,13 +1321,18 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)  			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;  			adev->firmware.fw_size +=  				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); -			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; -			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; -			info->fw = adev->gfx.mec2_fw; -			adev->firmware.fw_size += -				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); -		} +			/* TODO: Determine if MEC2 JT FW loading can be removed +				 for all GFX V9 asic and above */ +			if (adev->asic_type != CHIP_ARCTURUS) { +				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; +				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; +				info->fw = adev->gfx.mec2_fw; +				adev->firmware.fw_size += +					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, +					PAGE_SIZE); +			} +		}  	}  out: @@ -875,14 +1342,6 @@ out:  		dev_err(adev->dev,  			"gfx9: Failed to load firmware \"%s\"\n",  			fw_name); -		release_firmware(adev->gfx.pfp_fw); -		adev->gfx.pfp_fw = NULL; -		release_firmware(adev->gfx.me_fw); -		adev->gfx.me_fw = NULL; -		release_firmware(adev->gfx.ce_fw); -		adev->gfx.ce_fw = NULL; -		release_firmware(adev->gfx.rlc_fw); -		adev->gfx.rlc_fw = NULL;  		release_firmware(adev->gfx.mec_fw);  		adev->gfx.mec_fw = NULL;  		release_firmware(adev->gfx.mec2_fw); @@ -891,6 +1350,59 @@ out:  	return err;  } +static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +{ +	const char *chip_name; +	int r; + +	DRM_DEBUG("\n"); + +	switch (adev->asic_type) { +	case CHIP_VEGA10: +		chip_name = "vega10"; +		break; +	case CHIP_VEGA12: +		chip_name = "vega12"; +		break; +	case CHIP_VEGA20: +		chip_name = "vega20"; +		break; +	case CHIP_RAVEN: +		if (adev->rev_id >= 8) +			chip_name = "raven2"; +		else if (adev->pdev->device == 0x15d8) +			chip_name = "picasso"; +		else +			chip_name = "raven"; +		break; +	case CHIP_ARCTURUS: +		chip_name = "arcturus"; +		break; +	case CHIP_RENOIR: +		chip_name = "renoir"; +		break; +	default: +		BUG(); +	} + +	/* No CPG in Arcturus */ +	if (adev->asic_type != CHIP_ARCTURUS) { +		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); +		if (r) +			return r; +	} + +	r = gfx_v9_0_init_rlc_microcode(adev, chip_name); +	if (r) +		return r; + +	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); +	if (r) +		return r; + +	return r; +} +  static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)  {  	u32 count = 0; @@ -1128,7 +1640,7 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)  			return r;  	} -	if (adev->asic_type == CHIP_RAVEN) { +	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {  		/* TODO: double check the cp_table_size for RV */  		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */  		r = amdgpu_gfx_rlc_init_cpt(adev); @@ -1324,7 +1836,9 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {  	.read_wave_data = &gfx_v9_0_read_wave_data,  	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,  	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, -	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q +	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, +	.ras_error_inject = &gfx_v9_0_ras_error_inject, +	.query_ras_error_count = &gfx_v9_0_query_ras_error_count  };  static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) @@ -1377,6 +1891,26 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)  		else  			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;  		break; +	case CHIP_ARCTURUS: +		adev->gfx.config.max_hw_contexts = 8; +		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; +		adev->gfx.config.sc_prim_fifo_size_backend = 0x100; +		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; +		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; +		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); +		gb_addr_config &= ~0xf3e777ff; +		gb_addr_config |= 0x22014042; +		break; +	case CHIP_RENOIR: +		adev->gfx.config.max_hw_contexts = 8; +		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; +		adev->gfx.config.sc_prim_fifo_size_backend = 0x100; +		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; +		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; +		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); +		gb_addr_config &= ~0xf3e777ff; +		gb_addr_config |= 0x22010042; +		break;  	default:  		BUG();  		break; @@ -1653,6 +2187,8 @@ static int gfx_v9_0_sw_init(void *handle)  	case CHIP_VEGA12:  	case CHIP_VEGA20:  	case CHIP_RAVEN: +	case CHIP_ARCTURUS: +	case CHIP_RENOIR:  		adev->gfx.mec.num_mec = 2;  		break;  	default: @@ -1810,7 +2346,7 @@ static int gfx_v9_0_sw_fini(void *handle)  	gfx_v9_0_mec_fini(adev);  	gfx_v9_0_ngg_fini(adev);  	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); -	if (adev->asic_type == CHIP_RAVEN) { +	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {  		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,  				&adev->gfx.rlc.cp_table_gpu_addr,  				(void **)&adev->gfx.rlc.cp_table_ptr); @@ -1918,6 +2454,33 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)  	}  	soc15_grbm_select(adev, 0, 0, 0, 0);  	mutex_unlock(&adev->srbm_mutex); + +	/* Initialize all compute VMIDs to have no GDS, GWS, or OA +	   acccess. These should be enabled by FW for target VMIDs. */ +	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); +	} +} + +static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) +{ +	int vmid; + +	/* +	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA +	 * access. Compute VMIDs should be enabled by FW for target VMIDs, +	 * the driver can enable them for graphics. VMID0 should maintain +	 * access so that HWS firmware can save/restore entries. +	 */ +	for (vmid = 1; vmid < 16; vmid++) { +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); +		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); +	}  }  static void gfx_v9_0_constants_init(struct amdgpu_device *adev) @@ -1936,7 +2499,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)  	/* XXX SH_MEM regs */  	/* where to put LDS, scratch, GPUVM in FSA64 space */  	mutex_lock(&adev->srbm_mutex); -	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { +	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {  		soc15_grbm_select(adev, 0, 0, 0, i);  		/* CP and shaders */  		if (i == 0) { @@ -1964,6 +2527,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)  	mutex_unlock(&adev->srbm_mutex);  	gfx_v9_0_init_compute_vmid(adev); +	gfx_v9_0_init_gds_vmid(adev);  }  static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) @@ -2840,6 +3404,10 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)  	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;  	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;  	mqd->compute_static_thread_mgmt_se3 = 0xffffffff; +	mqd->compute_static_thread_mgmt_se4 = 0xffffffff; +	mqd->compute_static_thread_mgmt_se5 = 0xffffffff; +	mqd->compute_static_thread_mgmt_se6 = 0xffffffff; +	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;  	mqd->compute_misc_reserved = 0x00000003;  	mqd->dynamic_cu_mask_addr_lo = @@ -3243,10 +3811,12 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)  		gfx_v9_0_enable_gui_idle_interrupt(adev, false);  	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { -		/* legacy firmware loading */ -		r = gfx_v9_0_cp_gfx_load_microcode(adev); -		if (r) -			return r; +		if (adev->asic_type != CHIP_ARCTURUS) { +			/* legacy firmware loading */ +			r = gfx_v9_0_cp_gfx_load_microcode(adev); +			if (r) +				return r; +		}  		r = gfx_v9_0_cp_compute_load_microcode(adev);  		if (r) @@ -3257,18 +3827,22 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)  	if (r)  		return r; -	r = gfx_v9_0_cp_gfx_resume(adev); -	if (r) -		return r; +	if (adev->asic_type != CHIP_ARCTURUS) { +		r = gfx_v9_0_cp_gfx_resume(adev); +		if (r) +			return r; +	}  	r = gfx_v9_0_kcq_resume(adev);  	if (r)  		return r; -	ring = &adev->gfx.gfx_ring[0]; -	r = amdgpu_ring_test_helper(ring); -	if (r) -		return r; +	if (adev->asic_type != CHIP_ARCTURUS) { +		ring = &adev->gfx.gfx_ring[0]; +		r = amdgpu_ring_test_helper(ring); +		if (r) +			return r; +	}  	for (i = 0; i < adev->gfx.num_compute_rings; i++) {  		ring = &adev->gfx.compute_ring[i]; @@ -3282,7 +3856,8 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)  static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)  { -	gfx_v9_0_cp_gfx_enable(adev, enable); +	if (adev->asic_type != CHIP_ARCTURUS) +		gfx_v9_0_cp_gfx_enable(adev, enable);  	gfx_v9_0_cp_compute_enable(adev, enable);  } @@ -3291,7 +3866,8 @@ static int gfx_v9_0_hw_init(void *handle)  	int r;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	gfx_v9_0_init_golden_registers(adev); +	if (!amdgpu_sriov_vf(adev)) +		gfx_v9_0_init_golden_registers(adev);  	gfx_v9_0_constants_init(adev); @@ -3307,9 +3883,11 @@ static int gfx_v9_0_hw_init(void *handle)  	if (r)  		return r; -	r = gfx_v9_0_ngg_en(adev); -	if (r) -		return r; +	if (adev->asic_type != CHIP_ARCTURUS) { +		r = gfx_v9_0_ngg_en(adev); +		if (r) +			return r; +	}  	return r;  } @@ -3457,8 +4035,9 @@ static int gfx_v9_0_soft_reset(void *handle)  		/* stop the rlc */  		adev->gfx.rlc.funcs->stop(adev); -		/* Disable GFX parsing/prefetching */ -		gfx_v9_0_cp_gfx_enable(adev, false); +		if (adev->asic_type != CHIP_ARCTURUS) +			/* Disable GFX parsing/prefetching */ +			gfx_v9_0_cp_gfx_enable(adev, false);  		/* Disable MEC parsing/prefetching */  		gfx_v9_0_cp_compute_enable(adev, false); @@ -3801,7 +4380,10 @@ static int gfx_v9_0_early_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; +	if (adev->asic_type == CHIP_ARCTURUS) +		adev->gfx.num_gfx_rings = 0; +	else +		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;  	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;  	gfx_v9_0_set_ring_funcs(adev);  	gfx_v9_0_set_irq_funcs(adev); @@ -3812,6 +4394,7 @@ static int gfx_v9_0_early_init(void *handle)  }  static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, +		struct ras_err_data *err_data,  		struct amdgpu_iv_entry *entry);  static int gfx_v9_0_ecc_late_init(void *handle) @@ -3977,6 +4560,9 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,  {  	amdgpu_gfx_rlc_enter_safe_mode(adev); +	if (is_support_sw_smu(adev) && !enable) +		smu_set_gfx_cgpg(&adev->smu, enable); +  	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {  		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);  		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) @@ -4088,6 +4674,9 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,  {  	uint32_t data, def; +	if (adev->asic_type == CHIP_ARCTURUS) +		return; +  	amdgpu_gfx_rlc_enter_safe_mode(adev);  	/* Enable 3D CGCG/CGLS */ @@ -4153,8 +4742,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev  		/* enable cgcg FSM(0x0000363F) */  		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); -		data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | -			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; +		if (adev->asic_type == CHIP_ARCTURUS) +			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | +				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; +		else +			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | +				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;  		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)  			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |  				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; @@ -4226,6 +4819,7 @@ static int gfx_v9_0_set_powergating_state(void *handle,  	switch (adev->asic_type) {  	case CHIP_RAVEN: +	case CHIP_RENOIR:  		if (!enable) {  			amdgpu_gfx_off_ctrl(adev, false);  			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); @@ -4244,6 +4838,8 @@ static int gfx_v9_0_set_powergating_state(void *handle,  			gfx_v9_0_enable_cp_power_gating(adev, false);  		/* update gfx cgpg state */ +		if (is_support_sw_smu(adev) && enable) +			smu_set_gfx_cgpg(&adev->smu, enable);  		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);  		/* update mgcg state */ @@ -4280,6 +4876,8 @@ static int gfx_v9_0_set_clockgating_state(void *handle,  	case CHIP_VEGA12:  	case CHIP_VEGA20:  	case CHIP_RAVEN: +	case CHIP_ARCTURUS: +	case CHIP_RENOIR:  		gfx_v9_0_update_gfx_clock_gating(adev,  						 state == AMD_CG_STATE_GATE ? true : false);  		break; @@ -4321,14 +4919,16 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)  	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)  		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; -	/* AMD_CG_SUPPORT_GFX_3D_CGCG */ -	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); -	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) -		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; +	if (adev->asic_type != CHIP_ARCTURUS) { +		/* AMD_CG_SUPPORT_GFX_3D_CGCG */ +		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); +		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) +			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; -	/* AMD_CG_SUPPORT_GFX_3D_CGLS */ -	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) -		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; +		/* AMD_CG_SUPPORT_GFX_3D_CGLS */ +		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) +			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; +	}  }  static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) @@ -4860,7 +5460,7 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)  	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);  	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);  	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); -	WREG32(mmSQ_CMD, value); +	WREG32_SOC15(GC, 0, mmSQ_CMD, value);  }  static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, @@ -5124,12 +5724,423 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,  }  static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, +		struct ras_err_data *err_data,  		struct amdgpu_iv_entry *entry)  {  	/* TODO ue will trigger an interrupt. */  	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); +	if (adev->gfx.funcs->query_ras_error_count) +		adev->gfx.funcs->query_ras_error_count(adev, err_data);  	amdgpu_ras_reset_gpu(adev, 0); -	return AMDGPU_RAS_UE; +	return AMDGPU_RAS_SUCCESS; +} + +static const struct { +	const char *name; +	uint32_t ip; +	uint32_t inst; +	uint32_t seg; +	uint32_t reg_offset; +	uint32_t per_se_instance; +	int32_t num_instance; +	uint32_t sec_count_mask; +	uint32_t ded_count_mask; +} gfx_ras_edc_regs[] = { +	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, +	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT), +	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, +	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, +	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT), +	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) }, +	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, +	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 }, +	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, +	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 }, +	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, +	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT), +	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) }, +	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, +	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 }, +	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, +	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), +	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) }, +	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, +	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT), +	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) }, +	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, +	  REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 }, +	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, +	  REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 }, +	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, +	  REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 }, +	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, +	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC), +	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) }, +	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, +	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 }, +	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), +	  0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), +	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, +	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM", +	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, +	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), +	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, +	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM", +	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, +	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 }, +	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", +	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, +	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), +	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, +	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", +	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, +	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), +	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, +	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", +	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, +	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), +	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, +	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", +	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, +	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), +	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, +	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1, +	  REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 }, +	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), +	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, +	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 }, +	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 }, +	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 }, +	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 }, +	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, +	  REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 }, +	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, +	  REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 }, +	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), +	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, +	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), +	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, +	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), +	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, +	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), +	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, +	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), +	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, +	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 }, +	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 }, +	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 }, +	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 }, +	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 }, +	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 }, +	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 }, +	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, +	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 }, +	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, +	  16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 }, +	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), +	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), +	  0 }, +	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, +	  16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 }, +	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), +	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), +	  0 }, +	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, +	  16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 }, +	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72, +	  REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 }, +	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, +	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), +	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, +	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, +	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), +	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, +	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, +	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 }, +	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, +	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 }, +	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, +	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 }, +	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, +	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), +	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, +	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, +	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), +	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, +	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), +	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, +	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), +	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, +	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 }, +	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT), +	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) }, +	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT), +	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) }, +	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT), +	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) }, +	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT), +	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) }, +	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT), +	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) }, +	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT), +	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) }, +	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, +	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT), +	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) }, +	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), +	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, +	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, +	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), +	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, +	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, +	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), +	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, +	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, +	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, +	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, +	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, +	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, +	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", +	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, +	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), +	  0 }, +	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 }, +	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 }, +	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 }, +	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", +	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, +	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 }, +	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, +	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, +	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, +	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, +	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, +	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), +	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, +	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", +	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, +	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), +	  0 }, +	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 }, +	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 }, +	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, +	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 }, +	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", +	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, +	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 }, +	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), +	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, +	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), +	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, +	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), +	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, +	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), +	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, +	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), +	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, +	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 }, +	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 }, +	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 }, +	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 }, +	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 }, +	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), +	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, +	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), +	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, +	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), +	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, +	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 }, +	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 }, +	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 }, +	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 }, +	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 }, +	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, +	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 }, +}; + +static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, +				     void *inject_if) +{ +	struct ras_inject_if *info = (struct ras_inject_if *)inject_if; +	int ret; +	struct ta_ras_trigger_error_input block_info = { 0 }; + +	if (adev->asic_type != CHIP_VEGA20) +		return -EINVAL; + +	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) +		return -EINVAL; + +	if (!ras_gfx_subblocks[info->head.sub_block_index].name) +		return -EPERM; + +	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & +	      info->head.type)) { +		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", +			ras_gfx_subblocks[info->head.sub_block_index].name, +			info->head.type); +		return -EPERM; +	} + +	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & +	      info->head.type)) { +		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", +			ras_gfx_subblocks[info->head.sub_block_index].name, +			info->head.type); +		return -EPERM; +	} + +	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); +	block_info.sub_block_index = +		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; +	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); +	block_info.address = info->address; +	block_info.value = info->value; + +	mutex_lock(&adev->grbm_idx_mutex); +	ret = psp_ras_trigger_error(&adev->psp, &block_info); +	mutex_unlock(&adev->grbm_idx_mutex); + +	return ret; +} + +static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, +					  void *ras_error_status) +{ +	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; +	uint32_t sec_count, ded_count; +	uint32_t i; +	uint32_t reg_value; +	uint32_t se_id, instance_id; + +	if (adev->asic_type != CHIP_VEGA20) +		return -EINVAL; + +	err_data->ue_count = 0; +	err_data->ce_count = 0; + +	mutex_lock(&adev->grbm_idx_mutex); +	for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { +		for (instance_id = 0; instance_id < 256; instance_id++) { +			for (i = 0; +			     i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]); +			     i++) { +				if (se_id != 0 && +				    !gfx_ras_edc_regs[i].per_se_instance) +					continue; +				if (instance_id >= gfx_ras_edc_regs[i].num_instance) +					continue; + +				gfx_v9_0_select_se_sh(adev, se_id, 0, +						      instance_id); + +				reg_value = RREG32( +					adev->reg_offset[gfx_ras_edc_regs[i].ip] +							[gfx_ras_edc_regs[i].inst] +							[gfx_ras_edc_regs[i].seg] + +					gfx_ras_edc_regs[i].reg_offset); +				sec_count = reg_value & +					    gfx_ras_edc_regs[i].sec_count_mask; +				ded_count = reg_value & +					    gfx_ras_edc_regs[i].ded_count_mask; +				if (sec_count) { +					DRM_INFO( +						"Instance[%d][%d]: SubBlock %s, SEC %d\n", +						se_id, instance_id, +						gfx_ras_edc_regs[i].name, +						sec_count); +					err_data->ce_count++; +				} + +				if (ded_count) { +					DRM_INFO( +						"Instance[%d][%d]: SubBlock %s, DED %d\n", +						se_id, instance_id, +						gfx_ras_edc_regs[i].name, +						ded_count); +					err_data->ue_count++; +				} +			} +		} +	} +	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); +	mutex_unlock(&adev->grbm_idx_mutex); + +	return 0;  }  static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, @@ -5174,7 +6185,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {  	.align_mask = 0xff,  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),  	.support_64bit_ptrs = true, -	.vmhub = AMDGPU_GFXHUB, +	.vmhub = AMDGPU_GFXHUB_0,  	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,  	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,  	.set_wptr = gfx_v9_0_ring_set_wptr_gfx, @@ -5225,7 +6236,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {  	.align_mask = 0xff,  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),  	.support_64bit_ptrs = true, -	.vmhub = AMDGPU_GFXHUB, +	.vmhub = AMDGPU_GFXHUB_0,  	.get_rptr = gfx_v9_0_ring_get_rptr_compute,  	.get_wptr = gfx_v9_0_ring_get_wptr_compute,  	.set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -5260,7 +6271,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {  	.align_mask = 0xff,  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),  	.support_64bit_ptrs = true, -	.vmhub = AMDGPU_GFXHUB, +	.vmhub = AMDGPU_GFXHUB_0,  	.get_rptr = gfx_v9_0_ring_get_rptr_compute,  	.get_wptr = gfx_v9_0_ring_get_wptr_compute,  	.set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -5340,6 +6351,8 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)  	case CHIP_VEGA12:  	case CHIP_VEGA20:  	case CHIP_RAVEN: +	case CHIP_ARCTURUS: +	case CHIP_RENOIR:  		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;  		break;  	default: @@ -5357,6 +6370,7 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)  		adev->gds.gds_size = 0x10000;  		break;  	case CHIP_RAVEN: +	case CHIP_ARCTURUS:  		adev->gds.gds_size = 0x1000;  		break;  	default: @@ -5378,6 +6392,9 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)  		else  			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */  		break; +	case CHIP_ARCTURUS: +		adev->gds.gds_compute_max_wave_id = 0xfff; +		break;  	default:  		/* this really depends on the chip */  		adev->gds.gds_compute_max_wave_id = 0x7ff; @@ -5422,12 +6439,21 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,  {  	int i, j, k, counter, active_cu_number = 0;  	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; -	unsigned disable_masks[4 * 2]; +	unsigned disable_masks[4 * 4];  	if (!adev || !cu_info)  		return -EINVAL; -	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); +	/* +	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs +	 */ +	if (adev->gfx.config.max_shader_engines * +		adev->gfx.config.max_sh_per_se > 16) +		return -EINVAL; + +	amdgpu_gfx_parse_disable_cu(disable_masks, +				    adev->gfx.config.max_shader_engines, +				    adev->gfx.config.max_sh_per_se);  	mutex_lock(&adev->grbm_idx_mutex);  	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { @@ -5436,11 +6462,23 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,  			ao_bitmap = 0;  			counter = 0;  			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); -			if (i < 4 && j < 2) -				gfx_v9_0_set_user_cu_inactive_bitmap( -					adev, disable_masks[i * 2 + j]); +			gfx_v9_0_set_user_cu_inactive_bitmap( +				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);  			bitmap = gfx_v9_0_get_cu_active_bitmap(adev); -			cu_info->bitmap[i][j] = bitmap; + +			/* +			 * The bitmap(and ao_cu_bitmap) in cu_info structure is +			 * 4x4 size array, and it's usually suitable for Vega +			 * ASICs which has 4*2 SE/SH layout. +			 * But for Arcturus, SE/SH layout is changed to 8*1. +			 * To mostly reduce the impact, we make it compatible +			 * with current bitmap array as below: +			 *    SE4,SH0 --> bitmap[0][1] +			 *    SE5,SH0 --> bitmap[1][1] +			 *    SE6,SH0 --> bitmap[2][1] +			 *    SE7,SH0 --> bitmap[3][1] +			 */ +			cu_info->bitmap[i % 4][j + i / 4] = bitmap;  			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {  				if (bitmap & mask) { @@ -5453,7 +6491,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,  			active_cu_number += counter;  			if (i < 2 && j < 2)  				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); -			cu_info->ao_cu_bitmap[i][j] = ao_bitmap; +			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;  		}  	}  	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 15986748f59f..6ce37ce77d14 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -357,7 +357,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,  void gfxhub_v1_0_init(struct amdgpu_device *adev)  { -	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; +	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];  	hub->ctx0_ptb_addr_lo32 =  		SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index d605b4963f8a..8b789f750b72 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -140,7 +140,7 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev)  	/* XXX for emulation, Refer to closed source code.*/  	tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,  			    L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); -	tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); +	tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);  	tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);  	tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);  	WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp); @@ -333,7 +333,7 @@ void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev,  void gfxhub_v2_0_init(struct amdgpu_device *adev)  { -	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; +	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];  	hub->ctx0_ptb_addr_lo32 =  		SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5eeb72fcc123..241a4e57cf4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -62,7 +62,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,  	struct amdgpu_vmhub *hub;  	u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i; -	bits[AMDGPU_GFXHUB] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | +	bits[AMDGPU_GFXHUB_0] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |  		GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |  		GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |  		GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | @@ -70,7 +70,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,  		GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |  		GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; -	bits[AMDGPU_MMHUB] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | +	bits[AMDGPU_MMHUB_0] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |  		MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |  		MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |  		MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | @@ -81,39 +81,39 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,  	switch (state) {  	case AMDGPU_IRQ_STATE_DISABLE:  		/* MM HUB */ -		hub = &adev->vmhub[AMDGPU_MMHUB]; +		hub = &adev->vmhub[AMDGPU_MMHUB_0];  		for (i = 0; i < 16; i++) {  			reg = hub->vm_context0_cntl + i;  			tmp = RREG32(reg); -			tmp &= ~bits[AMDGPU_MMHUB]; +			tmp &= ~bits[AMDGPU_MMHUB_0];  			WREG32(reg, tmp);  		}  		/* GFX HUB */ -		hub = &adev->vmhub[AMDGPU_GFXHUB]; +		hub = &adev->vmhub[AMDGPU_GFXHUB_0];  		for (i = 0; i < 16; i++) {  			reg = hub->vm_context0_cntl + i;  			tmp = RREG32(reg); -			tmp &= ~bits[AMDGPU_GFXHUB]; +			tmp &= ~bits[AMDGPU_GFXHUB_0];  			WREG32(reg, tmp);  		}  		break;  	case AMDGPU_IRQ_STATE_ENABLE:  		/* MM HUB */ -		hub = &adev->vmhub[AMDGPU_MMHUB]; +		hub = &adev->vmhub[AMDGPU_MMHUB_0];  		for (i = 0; i < 16; i++) {  			reg = hub->vm_context0_cntl + i;  			tmp = RREG32(reg); -			tmp |= bits[AMDGPU_MMHUB]; +			tmp |= bits[AMDGPU_MMHUB_0];  			WREG32(reg, tmp);  		}  		/* GFX HUB */ -		hub = &adev->vmhub[AMDGPU_GFXHUB]; +		hub = &adev->vmhub[AMDGPU_GFXHUB_0];  		for (i = 0; i < 16; i++) {  			reg = hub->vm_context0_cntl + i;  			tmp = RREG32(reg); -			tmp |= bits[AMDGPU_GFXHUB]; +			tmp |= bits[AMDGPU_GFXHUB_0];  			WREG32(reg, tmp);  		}  		break; @@ -136,22 +136,53 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,  	addr |= ((u64)entry->src_data[1] & 0xf) << 44;  	if (!amdgpu_sriov_vf(adev)) { +		/* +		 * Issue a dummy read to wait for the status register to +		 * be updated to avoid reading an incorrect value due to +		 * the new fast GRBM interface. +		 */ +		if (entry->vmid_src == AMDGPU_GFXHUB_0) +			RREG32(hub->vm_l2_pro_fault_status); +  		status = RREG32(hub->vm_l2_pro_fault_status);  		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);  	}  	if (printk_ratelimit()) { +		struct amdgpu_task_info task_info; + +		memset(&task_info, 0, sizeof(struct amdgpu_task_info)); +		amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); +  		dev_err(adev->dev, -			"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", +			"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " +			"for process %s pid %d thread %s pid %d)\n",  			entry->vmid_src ? "mmhub" : "gfxhub",  			entry->src_id, entry->ring_id, entry->vmid, -			entry->pasid); -		dev_err(adev->dev, "  at page 0x%016llx from %d\n", +			entry->pasid, task_info.process_name, task_info.tgid, +			task_info.task_name, task_info.pid); +		dev_err(adev->dev, "  in page starting at address 0x%016llx from client %d\n",  			addr, entry->client_id); -		if (!amdgpu_sriov_vf(adev)) +		if (!amdgpu_sriov_vf(adev)) {  			dev_err(adev->dev, -				"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", +				"GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",  				status); +			dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", +				REG_GET_FIELD(status, +				GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); +			dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", +				REG_GET_FIELD(status, +				GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); +			dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", +				REG_GET_FIELD(status, +				GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); +			dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", +				REG_GET_FIELD(status, +				GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); +			dev_err(adev->dev, "\t RW: 0x%lx\n", +				REG_GET_FIELD(status, +				GCVM_L2_PROTECTION_FAULT_STATUS, RW)); +		}  	}  	return 0; @@ -206,6 +237,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,  	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); +	/* +	 * Issue a dummy read to wait for the ACK register to be cleared +	 * to avoid a false ACK due to the new fast GRBM interface. +	 */ +	if (vmhub == AMDGPU_GFXHUB_0) +		RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); +  	/* Wait for ACK with a delay.*/  	for (i = 0; i < adev->usec_timeout; i++) {  		tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); @@ -230,8 +268,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,   *   * Flush the TLB for the requested page table.   */ -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, -				    uint32_t vmid, uint32_t flush_type) +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, +					uint32_t vmhub, uint32_t flush_type)  {  	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;  	struct dma_fence *fence; @@ -244,11 +282,18 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,  	mutex_lock(&adev->mman.gtt_window_lock); -	gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB, 0); +	if (vmhub == AMDGPU_MMHUB_0) { +		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); +		mutex_unlock(&adev->mman.gtt_window_lock); +		return; +	} + +	BUG_ON(vmhub != AMDGPU_GFXHUB_0); +  	if (!adev->mman.buffer_funcs_enabled ||  	    !adev->ib_pool_ready ||  	    adev->in_gpu_reset) { -		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB, 0); +		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);  		mutex_unlock(&adev->mman.gtt_window_lock);  		return;  	} @@ -313,7 +358,7 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid  	struct amdgpu_device *adev = ring->adev;  	uint32_t reg; -	if (ring->funcs->vmhub == AMDGPU_GFXHUB) +	if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)  		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;  	else  		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -524,6 +569,8 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)  	if (amdgpu_gart_size == -1) {  		switch (adev->asic_type) {  		case CHIP_NAVI10: +		case CHIP_NAVI14: +		case CHIP_NAVI12:  		default:  			adev->gmc.gart_size = 512ULL << 20;  			break; @@ -590,7 +637,6 @@ static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)  static int gmc_v10_0_sw_init(void *handle)  {  	int r; -	int dma_bits;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	gfxhub_v2_0_init(adev); @@ -601,9 +647,12 @@ static int gmc_v10_0_sw_init(void *handle)  	adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12: +		adev->num_vmhubs = 2;  		/*  		 * To fulfill 4-level page support, -		 * vm size is 256TB (48bit), maximum size of Navi10, +		 * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12,  		 * block size 512 (9bit)  		 */  		amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); @@ -637,26 +686,10 @@ static int gmc_v10_0_sw_init(void *handle)  	else  		adev->gmc.stolen_size = 9 * 1024 *1024; -	/* -	 * Set DMA mask + need_dma32 flags. -	 * PCIE - can handle 44-bits. -	 * IGP - can handle 44-bits -	 * PCI - dma32 for legacy pci gart, 44 bits on navi10 -	 */ -	adev->need_dma32 = false; -	dma_bits = adev->need_dma32 ? 32 : 44; - -	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); +	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));  	if (r) { -		adev->need_dma32 = true; -		dma_bits = 32;  		printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); -	} - -	r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); -	if (r) { -		pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); -		printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); +		return r;  	}  	r = gmc_v10_0_mc_init(adev); @@ -680,8 +713,8 @@ static int gmc_v10_0_sw_init(void *handle)  	 * amdgpu graphics/compute will use VMIDs 1-7  	 * amdkfd will use VMIDs 8-15  	 */ -	adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; -	adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; +	adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; +	adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS;  	amdgpu_vm_manager_init(adev); @@ -717,6 +750,8 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev)  {  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		break;  	default:  		break; @@ -766,7 +801,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)  	gfxhub_v2_0_set_fault_enable_default(adev, value);  	mmhub_v2_0_set_fault_enable_default(adev, value); -	gmc_v10_0_flush_gpu_tlb(adev, 0, 0); +	gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); +	gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);  	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",  		 (unsigned)(adev->gmc.gart_size >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index ca8dbe91cc8b..9fb1765e92d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -362,8 +362,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)  	return 0;  } -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, -				uint32_t vmid, uint32_t flush_type) +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, +					uint32_t vmhub, uint32_t flush_type)  {  	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);  } @@ -571,7 +571,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)  	else  		gmc_v6_0_set_fault_enable_default(adev, true); -	gmc_v6_0_flush_gpu_tlb(adev, 0, 0); +	gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0);  	dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",  		 (unsigned)(adev->gmc.gart_size >> 20),  		 (unsigned long long)table_addr); @@ -839,9 +839,10 @@ static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)  static int gmc_v6_0_sw_init(void *handle)  {  	int r; -	int dma_bits;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	adev->num_vmhubs = 1; +  	if (adev->flags & AMD_IS_APU) {  		adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;  	} else { @@ -862,20 +863,12 @@ static int gmc_v6_0_sw_init(void *handle)  	adev->gmc.mc_mask = 0xffffffffffULL; -	adev->need_dma32 = false; -	dma_bits = adev->need_dma32 ? 32 : 40; -	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); +	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));  	if (r) { -		adev->need_dma32 = true; -		dma_bits = 32;  		dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); +		return r;  	} -	r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); -	if (r) { -		pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); -		dev_warn(adev->dev, "amdgpu: No coherent DMA available.\n"); -	} -	adev->need_swiotlb = drm_need_swiotlb(dma_bits); +	adev->need_swiotlb = drm_need_swiotlb(44);  	r = gmc_v6_0_init_microcode(adev);  	if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 57f80065d57a..0c3d9bc3a641 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -433,8 +433,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)   *   * Flush the TLB for the requested page table (CIK).   */ -static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, -				uint32_t vmid, uint32_t flush_type) +static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, +					uint32_t vmhub, uint32_t flush_type)  {  	/* bits 0-15 are the VM contexts0-15 */  	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -677,7 +677,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)  		WREG32(mmCHUB_CONTROL, tmp);  	} -	gmc_v7_0_flush_gpu_tlb(adev, 0, 0); +	gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0);  	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",  		 (unsigned)(adev->gmc.gart_size >> 20),  		 (unsigned long long)table_addr); @@ -959,9 +959,10 @@ static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)  static int gmc_v7_0_sw_init(void *handle)  {  	int r; -	int dma_bits;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	adev->num_vmhubs = 1; +  	if (adev->flags & AMD_IS_APU) {  		adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;  	} else { @@ -990,25 +991,12 @@ static int gmc_v7_0_sw_init(void *handle)  	 */  	adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ -	/* set DMA mask + need_dma32 flags. -	 * PCIE - can handle 40-bits. -	 * IGP - can handle 40-bits -	 * PCI - dma32 for legacy pci gart, 40 bits on newer asics -	 */ -	adev->need_dma32 = false; -	dma_bits = adev->need_dma32 ? 32 : 40; -	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); +	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40));  	if (r) { -		adev->need_dma32 = true; -		dma_bits = 32;  		pr_warn("amdgpu: No suitable DMA available\n"); +		return r;  	} -	r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); -	if (r) { -		pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); -		pr_warn("amdgpu: No coherent DMA available\n"); -	} -	adev->need_swiotlb = drm_need_swiotlb(dma_bits); +	adev->need_swiotlb = drm_need_swiotlb(40);  	r = gmc_v7_0_init_microcode(adev);  	if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 9238280d1ff7..ea764dd9245d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -635,8 +635,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)   *   * Flush the TLB for the requested page table (VI).   */ -static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, -				uint32_t vmid, uint32_t flush_type) +static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, +					uint32_t vmhub, uint32_t flush_type)  {  	/* bits 0-15 are the VM contexts0-15 */  	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -921,7 +921,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)  	else  		gmc_v8_0_set_fault_enable_default(adev, true); -	gmc_v8_0_flush_gpu_tlb(adev, 0, 0); +	gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0);  	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",  		 (unsigned)(adev->gmc.gart_size >> 20),  		 (unsigned long long)table_addr); @@ -1079,9 +1079,10 @@ static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)  static int gmc_v8_0_sw_init(void *handle)  {  	int r; -	int dma_bits;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	adev->num_vmhubs = 1; +  	if (adev->flags & AMD_IS_APU) {  		adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;  	} else { @@ -1116,25 +1117,12 @@ static int gmc_v8_0_sw_init(void *handle)  	 */  	adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ -	/* set DMA mask + need_dma32 flags. -	 * PCIE - can handle 40-bits. -	 * IGP - can handle 40-bits -	 * PCI - dma32 for legacy pci gart, 40 bits on newer asics -	 */ -	adev->need_dma32 = false; -	dma_bits = adev->need_dma32 ? 32 : 40; -	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); +	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40));  	if (r) { -		adev->need_dma32 = true; -		dma_bits = 32;  		pr_warn("amdgpu: No suitable DMA available\n"); +		return r;  	} -	r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); -	if (r) { -		pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); -		pr_warn("amdgpu: No coherent DMA available\n"); -	} -	adev->need_swiotlb = drm_need_swiotlb(dma_bits); +	adev->need_swiotlb = drm_need_swiotlb(40);  	r = gmc_v8_0_init_microcode(adev);  	if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 73f3b79ab131..f91337030dc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -47,7 +47,10 @@  #include "gfxhub_v1_0.h"  #include "mmhub_v1_0.h" +#include "athub_v1_0.h"  #include "gfxhub_v1_1.h" +#include "mmhub_v9_4.h" +#include "umc_v6_1.h"  #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" @@ -241,18 +244,30 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,  }  static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, +		struct ras_err_data *err_data,  		struct amdgpu_iv_entry *entry)  {  	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); -	amdgpu_ras_reset_gpu(adev, 0); -	return AMDGPU_RAS_UE; +	if (adev->umc.funcs->query_ras_error_count) +		adev->umc.funcs->query_ras_error_count(adev, err_data); +	/* umc query_ras_error_address is also responsible for clearing +	 * error status +	 */ +	if (adev->umc.funcs->query_ras_error_address) +		adev->umc.funcs->query_ras_error_address(adev, err_data); + +	/* only uncorrectable error needs gpu reset */ +	if (err_data->ue_count) +		amdgpu_ras_reset_gpu(adev, 0); + +	return AMDGPU_RAS_SUCCESS;  }  static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev,  		struct amdgpu_irq_src *source,  		struct amdgpu_iv_entry *entry)  { -	struct ras_common_if *ras_if = adev->gmc.ras_if; +	struct ras_common_if *ras_if = adev->gmc.umc_ras_if;  	struct ras_dispatch_if ih_data = {  		.entry = entry,  	}; @@ -284,7 +299,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,  	switch (state) {  	case AMDGPU_IRQ_STATE_DISABLE: -		for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { +		for (j = 0; j < adev->num_vmhubs; j++) {  			hub = &adev->vmhub[j];  			for (i = 0; i < 16; i++) {  				reg = hub->vm_context0_cntl + i; @@ -295,7 +310,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,  		}  		break;  	case AMDGPU_IRQ_STATE_ENABLE: -		for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { +		for (j = 0; j < adev->num_vmhubs; j++) {  			hub = &adev->vmhub[j];  			for (i = 0; i < 16; i++) {  				reg = hub->vm_context0_cntl + i; @@ -315,10 +330,11 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,  				struct amdgpu_irq_src *source,  				struct amdgpu_iv_entry *entry)  { -	struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; +	struct amdgpu_vmhub *hub;  	bool retry_fault = !!(entry->src_data[1] & 0x80);  	uint32_t status = 0;  	u64 addr; +	char hub_name[10];  	addr = (u64)entry->src_data[0] << 12;  	addr |= ((u64)entry->src_data[1] & 0xf) << 44; @@ -327,8 +343,27 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,  						    entry->timestamp))  		return 1; /* This also prevents sending it to KFD */ +	if (entry->client_id == SOC15_IH_CLIENTID_VMC) { +		snprintf(hub_name, sizeof(hub_name), "mmhub0"); +		hub = &adev->vmhub[AMDGPU_MMHUB_0]; +	} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { +		snprintf(hub_name, sizeof(hub_name), "mmhub1"); +		hub = &adev->vmhub[AMDGPU_MMHUB_1]; +	} else { +		snprintf(hub_name, sizeof(hub_name), "gfxhub0"); +		hub = &adev->vmhub[AMDGPU_GFXHUB_0]; +	} +  	/* If it's the first fault for this address, process it normally */  	if (!amdgpu_sriov_vf(adev)) { +		/* +		 * Issue a dummy read to wait for the status register to +		 * be updated to avoid reading an incorrect value due to +		 * the new fast GRBM interface. +		 */ +		if (entry->vmid_src == AMDGPU_GFXHUB_0) +			RREG32(hub->vm_l2_pro_fault_status); +  		status = RREG32(hub->vm_l2_pro_fault_status);  		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);  	} @@ -342,17 +377,33 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,  		dev_err(adev->dev,  			"[%s] %s page fault (src_id:%u ring:%u vmid:%u "  			"pasid:%u, for process %s pid %d thread %s pid %d)\n", -			entry->vmid_src ? "mmhub" : "gfxhub", -			retry_fault ? "retry" : "no-retry", +			hub_name, retry_fault ? "retry" : "no-retry",  			entry->src_id, entry->ring_id, entry->vmid,  			entry->pasid, task_info.process_name, task_info.tgid,  			task_info.task_name, task_info.pid); -		dev_err(adev->dev, "  in page starting at address 0x%016llx from %d\n", +		dev_err(adev->dev, "  in page starting at address 0x%016llx from client %d\n",  			addr, entry->client_id); -		if (!amdgpu_sriov_vf(adev)) +		if (!amdgpu_sriov_vf(adev)) {  			dev_err(adev->dev,  				"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",  				status); +			dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", +				REG_GET_FIELD(status, +				VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); +			dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", +				REG_GET_FIELD(status, +				VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); +			dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", +				REG_GET_FIELD(status, +				VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); +			dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", +				REG_GET_FIELD(status, +				VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); +			dev_err(adev->dev, "\t RW: 0x%lx\n", +				REG_GET_FIELD(status, +				VM_L2_PROTECTION_FAULT_STATUS, RW)); + +		}  	}  	return 0; @@ -413,44 +464,53 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,   *   * Flush the TLB for the requested page table using certain type.   */ -static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, -				uint32_t vmid, uint32_t flush_type) +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, +					uint32_t vmhub, uint32_t flush_type)  {  	const unsigned eng = 17; -	unsigned i, j; +	u32 j, tmp; +	struct amdgpu_vmhub *hub; -	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { -		struct amdgpu_vmhub *hub = &adev->vmhub[i]; -		u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); +	BUG_ON(vmhub >= adev->num_vmhubs); -		/* This is necessary for a HW workaround under SRIOV as well -		 * as GFXOFF under bare metal -		 */ -		if (adev->gfx.kiq.ring.sched.ready && -		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && -		    !adev->in_gpu_reset) { -			uint32_t req = hub->vm_inv_eng0_req + eng; -			uint32_t ack = hub->vm_inv_eng0_ack + eng; - -			amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, -							   1 << vmid); -			continue; -		} +	hub = &adev->vmhub[vmhub]; +	tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); -		spin_lock(&adev->gmc.invalidate_lock); -		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); -		for (j = 0; j < adev->usec_timeout; j++) { -			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); -			if (tmp & (1 << vmid)) -				break; -			udelay(1); -		} -		spin_unlock(&adev->gmc.invalidate_lock); -		if (j < adev->usec_timeout) -			continue; +	/* This is necessary for a HW workaround under SRIOV as well +	 * as GFXOFF under bare metal +	 */ +	if (adev->gfx.kiq.ring.sched.ready && +			(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && +			!adev->in_gpu_reset) { +		uint32_t req = hub->vm_inv_eng0_req + eng; +		uint32_t ack = hub->vm_inv_eng0_ack + eng; + +		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, +				1 << vmid); +		return; +	} + +	spin_lock(&adev->gmc.invalidate_lock); +	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); -		DRM_ERROR("Timeout waiting for VM flush ACK!\n"); +	/* +	 * Issue a dummy read to wait for the ACK register to be cleared +	 * to avoid a false ACK due to the new fast GRBM interface. +	 */ +	if (vmhub == AMDGPU_GFXHUB_0) +		RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + +	for (j = 0; j < adev->usec_timeout; j++) { +		tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); +		if (tmp & (1 << vmid)) +			break; +		udelay(1);  	} +	spin_unlock(&adev->gmc.invalidate_lock); +	if (j < adev->usec_timeout) +		return; + +	DRM_ERROR("Timeout waiting for VM flush ACK!\n");  }  static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, @@ -480,7 +540,11 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,  	struct amdgpu_device *adev = ring->adev;  	uint32_t reg; -	if (ring->funcs->vmhub == AMDGPU_GFXHUB) +	/* Do nothing because there's no lut register for mmhub1. */ +	if (ring->funcs->vmhub == AMDGPU_MMHUB_1) +		return; + +	if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)  		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;  	else  		reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -597,12 +661,41 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)  	adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;  } +static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) +{ +	switch (adev->asic_type) { +	case CHIP_VEGA20: +		adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; +		adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; +		adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; +		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET; +		adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; +		adev->umc.funcs = &umc_v6_1_funcs; +		break; +	default: +		break; +	} +} + +static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) +{ +	switch (adev->asic_type) { +	case CHIP_VEGA20: +		adev->mmhub_funcs = &mmhub_v1_0_funcs; +		break; +	default: +		break; +	} +} +  static int gmc_v9_0_early_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	gmc_v9_0_set_gmc_funcs(adev);  	gmc_v9_0_set_irq_funcs(adev); +	gmc_v9_0_set_umc_funcs(adev); +	gmc_v9_0_set_mmhub_funcs(adev);  	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;  	adev->gmc.shared_aperture_end = @@ -629,6 +722,8 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_VEGA10:  	case CHIP_RAVEN: +	case CHIP_ARCTURUS: +	case CHIP_RENOIR:  		return true;  	case CHIP_VEGA12:  	case CHIP_VEGA20: @@ -641,7 +736,8 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)  {  	struct amdgpu_ring *ring;  	unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = -		{GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP}; +		{GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP, +		GFXHUB_FREE_VM_INV_ENGS_BITMAP};  	unsigned i;  	unsigned vmhub, inv_eng; @@ -666,29 +762,28 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)  	return 0;  } -static int gmc_v9_0_ecc_late_init(void *handle) +static int gmc_v9_0_ecc_ras_block_late_init(void *handle, +			struct ras_fs_if *fs_info, struct ras_common_if *ras_block)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	struct ras_common_if **ras_if = &adev->gmc.ras_if; +	struct ras_common_if **ras_if = NULL;  	struct ras_ih_if ih_info = {  		.cb = gmc_v9_0_process_ras_data_cb,  	}; -	struct ras_fs_if fs_info = { -		.sysfs_name = "umc_err_count", -		.debugfs_name = "umc_err_inject", -	}; -	struct ras_common_if ras_block = { -		.block = AMDGPU_RAS_BLOCK__UMC, -		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, -		.sub_block_index = 0, -		.name = "umc", -	};  	int r; -	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) { -		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); +	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) +		ras_if = &adev->gmc.umc_ras_if; +	else if (ras_block->block == AMDGPU_RAS_BLOCK__MMHUB) +		ras_if = &adev->gmc.mmhub_ras_if; +	else +		BUG(); + +	if (!amdgpu_ras_is_supported(adev, ras_block->block)) { +		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);  		return 0;  	} +  	/* handle resume path. */  	if (*ras_if) {  		/* resend ras TA enable cmd during resume. @@ -700,7 +795,7 @@ static int gmc_v9_0_ecc_late_init(void *handle)  			if (r == -EAGAIN) {  				/* request a gpu reset. will run again. */  				amdgpu_ras_request_reset_on_boot(adev, -						AMDGPU_RAS_BLOCK__UMC); +						ras_block->block);  				return 0;  			}  			/* fail to enable ras, cleanup all. */ @@ -714,41 +809,46 @@ static int gmc_v9_0_ecc_late_init(void *handle)  	if (!*ras_if)  		return -ENOMEM; -	**ras_if = ras_block; +	**ras_if = *ras_block;  	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);  	if (r) {  		if (r == -EAGAIN) {  			amdgpu_ras_request_reset_on_boot(adev, -					AMDGPU_RAS_BLOCK__UMC); +					ras_block->block);  			r = 0;  		}  		goto feature;  	}  	ih_info.head = **ras_if; -	fs_info.head = **ras_if; +	fs_info->head = **ras_if; -	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); -	if (r) -		goto interrupt; +	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) { +		r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); +		if (r) +			goto interrupt; +	} -	amdgpu_ras_debugfs_create(adev, &fs_info); +	amdgpu_ras_debugfs_create(adev, fs_info); -	r = amdgpu_ras_sysfs_create(adev, &fs_info); +	r = amdgpu_ras_sysfs_create(adev, fs_info);  	if (r)  		goto sysfs;  resume: -	r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); -	if (r) -		goto irq; +	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) { +		r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); +		if (r) +			goto irq; +	}  	return 0;  irq:  	amdgpu_ras_sysfs_remove(adev, *ras_if);  sysfs:  	amdgpu_ras_debugfs_remove(adev, *ras_if); -	amdgpu_ras_interrupt_remove_handler(adev, &ih_info); +	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) +		amdgpu_ras_interrupt_remove_handler(adev, &ih_info);  interrupt:  	amdgpu_ras_feature_enable(adev, *ras_if, 0);  feature: @@ -757,6 +857,40 @@ feature:  	return r;  } +static int gmc_v9_0_ecc_late_init(void *handle) +{ +	int r; + +	struct ras_fs_if umc_fs_info = { +		.sysfs_name = "umc_err_count", +		.debugfs_name = "umc_err_inject", +	}; +	struct ras_common_if umc_ras_block = { +		.block = AMDGPU_RAS_BLOCK__UMC, +		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, +		.sub_block_index = 0, +		.name = "umc", +	}; +	struct ras_fs_if mmhub_fs_info = { +		.sysfs_name = "mmhub_err_count", +		.debugfs_name = "mmhub_err_inject", +	}; +	struct ras_common_if mmhub_ras_block = { +		.block = AMDGPU_RAS_BLOCK__MMHUB, +		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, +		.sub_block_index = 0, +		.name = "mmhub", +	}; + +	r = gmc_v9_0_ecc_ras_block_late_init(handle, +			&umc_fs_info, &umc_ras_block); +	if (r) +		return r; + +	r = gmc_v9_0_ecc_ras_block_late_init(handle, +			&mmhub_fs_info, &mmhub_ras_block); +	return r; +}  static int gmc_v9_0_late_init(void *handle)  { @@ -806,14 +940,17 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,  					struct amdgpu_gmc *mc)  {  	u64 base = 0; -	if (!amdgpu_sriov_vf(adev)) + +	if (adev->asic_type == CHIP_ARCTURUS) +		base = mmhub_v9_4_get_fb_location(adev); +	else if (!amdgpu_sriov_vf(adev))  		base = mmhub_v1_0_get_fb_location(adev); +  	/* add the xgmi offset of the physical node */  	base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;  	amdgpu_gmc_vram_location(adev, mc, base);  	amdgpu_gmc_gart_location(adev, mc); -	if (!amdgpu_sriov_vf(adev)) -		amdgpu_gmc_agp_location(adev, mc); +	amdgpu_gmc_agp_location(adev, mc);  	/* base offset of vram pages */  	adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); @@ -887,10 +1024,12 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)  		case CHIP_VEGA10:  /* all engines support GPUVM */  		case CHIP_VEGA12:  /* all engines support GPUVM */  		case CHIP_VEGA20: +		case CHIP_ARCTURUS:  		default:  			adev->gmc.gart_size = 512ULL << 20;  			break;  		case CHIP_RAVEN:   /* DCE SG support */ +		case CHIP_RENOIR:  			adev->gmc.gart_size = 1024ULL << 20;  			break;  		} @@ -923,7 +1062,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)  static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)  { -	u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); +	u32 d1vga_control;  	unsigned size;  	/* @@ -933,6 +1072,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)  	if (gmc_v9_0_keep_stolen_memory(adev))  		return 9 * 1024 * 1024; +	d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);  	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {  		size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */  	} else { @@ -940,6 +1080,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)  		switch (adev->asic_type) {  		case CHIP_RAVEN: +		case CHIP_RENOIR:  			viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);  			size = (REG_GET_FIELD(viewport,  					      HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * @@ -968,17 +1109,21 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)  static int gmc_v9_0_sw_init(void *handle)  {  	int r; -	int dma_bits;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	gfxhub_v1_0_init(adev); -	mmhub_v1_0_init(adev); +	if (adev->asic_type == CHIP_ARCTURUS) +		mmhub_v9_4_init(adev); +	else +		mmhub_v1_0_init(adev);  	spin_lock_init(&adev->gmc.invalidate_lock);  	adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);  	switch (adev->asic_type) {  	case CHIP_RAVEN: +		adev->num_vmhubs = 2; +  		if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {  			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);  		} else { @@ -991,6 +1136,10 @@ static int gmc_v9_0_sw_init(void *handle)  	case CHIP_VEGA10:  	case CHIP_VEGA12:  	case CHIP_VEGA20: +	case CHIP_RENOIR: +		adev->num_vmhubs = 2; + +  		/*  		 * To fulfill 4-level page support,  		 * vm size is 256TB (48bit), maximum size of Vega10, @@ -1002,6 +1151,12 @@ static int gmc_v9_0_sw_init(void *handle)  		else  			amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);  		break; +	case CHIP_ARCTURUS: +		adev->num_vmhubs = 3; + +		/* Keep the vm size same with Vega20 */ +		amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); +		break;  	default:  		break;  	} @@ -1012,6 +1167,13 @@ static int gmc_v9_0_sw_init(void *handle)  	if (r)  		return r; +	if (adev->asic_type == CHIP_ARCTURUS) { +		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT, +					&adev->gmc.vm_fault); +		if (r) +			return r; +	} +  	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,  				&adev->gmc.vm_fault); @@ -1030,25 +1192,12 @@ static int gmc_v9_0_sw_init(void *handle)  	 */  	adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ -	/* set DMA mask + need_dma32 flags. -	 * PCIE - can handle 44-bits. -	 * IGP - can handle 44-bits -	 * PCI - dma32 for legacy pci gart, 44 bits on vega10 -	 */ -	adev->need_dma32 = false; -	dma_bits = adev->need_dma32 ? 32 : 44; -	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); +	r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));  	if (r) { -		adev->need_dma32 = true; -		dma_bits = 32;  		printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); +		return r;  	} -	r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); -	if (r) { -		pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); -		printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); -	} -	adev->need_swiotlb = drm_need_swiotlb(dma_bits); +	adev->need_swiotlb = drm_need_swiotlb(44);  	if (adev->gmc.xgmi.supported) {  		r = gfxhub_v1_1_get_xgmi_info(adev); @@ -1077,8 +1226,9 @@ static int gmc_v9_0_sw_init(void *handle)  	 * amdgpu graphics/compute will use VMIDs 1-7  	 * amdkfd will use VMIDs 8-15  	 */ -	adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; -	adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; +	adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; +	adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; +	adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS;  	amdgpu_vm_manager_init(adev); @@ -1088,28 +1238,40 @@ static int gmc_v9_0_sw_init(void *handle)  static int gmc_v9_0_sw_fini(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	void *stolen_vga_buf;  	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && -			adev->gmc.ras_if) { -		struct ras_common_if *ras_if = adev->gmc.ras_if; +			adev->gmc.umc_ras_if) { +		struct ras_common_if *ras_if = adev->gmc.umc_ras_if;  		struct ras_ih_if ih_info = {  			.head = *ras_if,  		}; -		/*remove fs first*/ +		/* remove fs first */  		amdgpu_ras_debugfs_remove(adev, ras_if);  		amdgpu_ras_sysfs_remove(adev, ras_if); -		/*remove the IH*/ +		/* remove the IH */  		amdgpu_ras_interrupt_remove_handler(adev, &ih_info);  		amdgpu_ras_feature_enable(adev, ras_if, 0);  		kfree(ras_if);  	} +	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && +			adev->gmc.mmhub_ras_if) { +		struct ras_common_if *ras_if = adev->gmc.mmhub_ras_if; + +		/* remove fs and disable ras feature */ +		amdgpu_ras_debugfs_remove(adev, ras_if); +		amdgpu_ras_sysfs_remove(adev, ras_if); +		amdgpu_ras_feature_enable(adev, ras_if, 0); +		kfree(ras_if); +	} +  	amdgpu_gem_force_release(adev);  	amdgpu_vm_manager_fini(adev);  	if (gmc_v9_0_keep_stolen_memory(adev)) -		amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); +		amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf);  	amdgpu_gart_table_vram_free(adev);  	amdgpu_bo_fini(adev); @@ -1123,7 +1285,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_VEGA10: -		if (amdgpu_virt_support_skip_setting(adev)) +		if (amdgpu_sriov_vf(adev))  			break;  		/* fall through */  	case CHIP_VEGA20: @@ -1137,6 +1299,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)  	case CHIP_VEGA12:  		break;  	case CHIP_RAVEN: +		/* TODO for renoir */  		soc15_program_register_sequence(adev,  						golden_settings_athub_1_0_0,  						ARRAY_SIZE(golden_settings_athub_1_0_0)); @@ -1153,7 +1316,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)   */  static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)  { -	int r; +	int r, i;  	bool value;  	u32 tmp; @@ -1171,6 +1334,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_RAVEN: +		/* TODO for renoir */  		mmhub_v1_0_update_power_gating(adev, true);  		break;  	default: @@ -1181,7 +1345,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)  	if (r)  		return r; -	r = mmhub_v1_0_gart_enable(adev); +	if (adev->asic_type == CHIP_ARCTURUS) +		r = mmhub_v9_4_gart_enable(adev); +	else +		r = mmhub_v1_0_gart_enable(adev);  	if (r)  		return r; @@ -1202,8 +1369,13 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)  		value = true;  	gfxhub_v1_0_set_fault_enable_default(adev, value); -	mmhub_v1_0_set_fault_enable_default(adev, value); -	gmc_v9_0_flush_gpu_tlb(adev, 0, 0); +	if (adev->asic_type == CHIP_ARCTURUS) +		mmhub_v9_4_set_fault_enable_default(adev, value); +	else +		mmhub_v1_0_set_fault_enable_default(adev, value); + +	for (i = 0; i < adev->num_vmhubs; ++i) +		gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);  	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",  		 (unsigned)(adev->gmc.gart_size >> 20), @@ -1243,7 +1415,10 @@ static int gmc_v9_0_hw_init(void *handle)  static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)  {  	gfxhub_v1_0_gart_disable(adev); -	mmhub_v1_0_gart_disable(adev); +	if (adev->asic_type == CHIP_ARCTURUS) +		mmhub_v9_4_gart_disable(adev); +	else +		mmhub_v1_0_gart_disable(adev);  	amdgpu_gart_table_vram_unpin(adev);  } @@ -1308,14 +1483,26 @@ static int gmc_v9_0_set_clockgating_state(void *handle,  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	return mmhub_v1_0_set_clockgating(adev, state); +	if (adev->asic_type == CHIP_ARCTURUS) +		mmhub_v9_4_set_clockgating(adev, state); +	else +		mmhub_v1_0_set_clockgating(adev, state); + +	athub_v1_0_set_clockgating(adev, state); + +	return 0;  }  static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	mmhub_v1_0_get_clockgating(adev, flags); +	if (adev->asic_type == CHIP_ARCTURUS) +		mmhub_v9_4_get_clockgating(adev, flags); +	else +		mmhub_v1_0_get_clockgating(adev, flags); + +	athub_v1_0_get_clockgating(adev, flags);  }  static int gmc_v9_0_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h index 5c8deac65580..971c0840358f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h @@ -37,4 +37,11 @@  extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;  extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; +/* amdgpu_amdkfd*.c */ +void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, +				uint64_t value); +void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, +				uint64_t value); +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, +				uint32_t vmid, uint64_t value);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index dc5ce03034d3..04cd4b6f95d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -21,13 +21,13 @@   *   */  #include "amdgpu.h" +#include "amdgpu_ras.h"  #include "mmhub_v1_0.h"  #include "mmhub/mmhub_1_0_offset.h"  #include "mmhub/mmhub_1_0_sh_mask.h"  #include "mmhub/mmhub_1_0_default.h" -#include "athub/athub_1_0_offset.h" -#include "athub/athub_1_0_sh_mask.h" +#include "mmhub/mmhub_9_4_0_offset.h"  #include "vega10_enum.h"  #include "soc15_common.h" @@ -35,6 +35,9 @@  #define mmDAGB0_CNTL_MISC2_RV 0x008f  #define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0 +#define EA_EDC_CNT_MASK 0x3 +#define EA_EDC_CNT_SHIFT 0x2 +  u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)  {  	u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); @@ -111,7 +114,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)  		WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,  			     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); -	if (amdgpu_virt_support_skip_setting(adev)) +	if (amdgpu_sriov_vf(adev))  		return;  	/* Set default page address. */ @@ -159,7 +162,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)  {  	uint32_t tmp; -	if (amdgpu_virt_support_skip_setting(adev)) +	if (amdgpu_sriov_vf(adev))  		return;  	/* Setup L2 cache */ @@ -208,7 +211,7 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)  static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)  { -	if (amdgpu_virt_support_skip_setting(adev)) +	if (amdgpu_sriov_vf(adev))  		return;  	WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, @@ -348,7 +351,7 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)  				0);  	WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); -	if (!amdgpu_virt_support_skip_setting(adev)) { +	if (!amdgpu_sriov_vf(adev)) {  		/* Setup L2 cache */  		tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);  		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); @@ -367,7 +370,7 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)  {  	u32 tmp; -	if (amdgpu_virt_support_skip_setting(adev)) +	if (amdgpu_sriov_vf(adev))  		return;  	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); @@ -407,7 +410,7 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)  void mmhub_v1_0_init(struct amdgpu_device *adev)  { -	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; +	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];  	hub->ctx0_ptb_addr_lo32 =  		SOC15_REG_OFFSET(MMHUB, 0, @@ -491,22 +494,6 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad  		WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2);  } -static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, -						   bool enable) -{ -	uint32_t def, data; - -	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - -	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) -		data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; -	else -		data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; - -	if (def != data) -		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); -} -  static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,  						       bool enable)  { @@ -523,23 +510,6 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade  		WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data);  } -static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, -						  bool enable) -{ -	uint32_t def, data; - -	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - -	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && -	    (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) -		data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; -	else -		data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; - -	if(def != data) -		WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); -} -  int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,  			       enum amd_clockgating_state state)  { @@ -551,14 +521,11 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,  	case CHIP_VEGA12:  	case CHIP_VEGA20:  	case CHIP_RAVEN: +	case CHIP_RENOIR:  		mmhub_v1_0_update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE ? true : false); -		athub_update_medium_grain_clock_gating(adev, -				state == AMD_CG_STATE_GATE ? true : false);  		mmhub_v1_0_update_medium_grain_light_sleep(adev,  				state == AMD_CG_STATE_GATE ? true : false); -		athub_update_medium_grain_light_sleep(adev, -				state == AMD_CG_STATE_GATE ? true : false);  		break;  	default:  		break; @@ -569,18 +536,85 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,  void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)  { -	int data; +	int data, data1;  	if (amdgpu_sriov_vf(adev))  		*flags = 0; +	data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); + +	data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); +  	/* AMD_CG_SUPPORT_MC_MGCG */ -	data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); -	if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) +	if ((data & ATC_L2_MISC_CG__ENABLE_MASK) && +	    !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)))  		*flags |= AMD_CG_SUPPORT_MC_MGCG;  	/* AMD_CG_SUPPORT_MC_LS */ -	data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG);  	if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)  		*flags |= AMD_CG_SUPPORT_MC_LS;  } + +static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev, +					   void *ras_error_status) +{ +	int i; +	uint32_t ea0_edc_cnt, ea0_edc_cnt2; +	uint32_t ea1_edc_cnt, ea1_edc_cnt2; +	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + +	/* EDC CNT will be cleared automatically after read */ +	ea0_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT_VG20); +	ea0_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20); +	ea1_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT_VG20); +	ea1_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20); + +	/* error count of each error type is recorded by 2 bits, +	 * ce and ue count in EDC_CNT +	 */ +	for (i = 0; i < 5; i++) { +		err_data->ce_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); +		err_data->ce_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); +		ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; +		ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; +		err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); +		err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); +		ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; +		ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; +	} +	/* successive ue count in EDC_CNT */ +	for (i = 0; i < 5; i++) { +		err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); +		err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); +		ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; +		ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; +	} + +	/* ce and ue count in EDC_CNT2 */ +	for (i = 0; i < 3; i++) { +		err_data->ce_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); +		err_data->ce_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); +		ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; +		ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; +		err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); +		err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); +		ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; +		ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; +	} +	/* successive ue count in EDC_CNT2 */ +	for (i = 0; i < 6; i++) { +		err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); +		err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); +		ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; +		ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; +	} +} + +const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = { +	.query_ras_error_count = mmhub_v1_0_query_ras_error_count, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h index 0de0fdf98c00..c43319e8f945 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h @@ -23,6 +23,8 @@  #ifndef __MMHUB_V1_0_H__  #define __MMHUB_V1_0_H__ +extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs; +  u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev);  int mmhub_v1_0_gart_enable(struct amdgpu_device *adev);  void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 0f9549f19ade..3542c203c3c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -126,7 +126,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)  	/* XXX for emulation, Refer to closed source code.*/  	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,  			    0); -	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);  	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);  	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);  	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); @@ -324,7 +324,7 @@ void mmhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)  void mmhub_v2_0_init(struct amdgpu_device *adev)  { -	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; +	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];  	hub->ctx0_ptb_addr_lo32 =  		SOC15_REG_OFFSET(MMHUB, 0, @@ -406,6 +406,8 @@ int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		mmhub_v2_0_update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE ? true : false);  		mmhub_v2_0_update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c new file mode 100644 index 000000000000..0cf7ef44b4b5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -0,0 +1,642 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "mmhub_v9_4.h" + +#include "mmhub/mmhub_9_4_1_offset.h" +#include "mmhub/mmhub_9_4_1_sh_mask.h" +#include "mmhub/mmhub_9_4_1_default.h" +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "vega10_enum.h" + +#include "soc15_common.h" + +#define MMHUB_NUM_INSTANCES			2 +#define MMHUB_INSTANCE_REGISTER_OFFSET		0x3000 + +u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) +{ +	/* The base should be same b/t 2 mmhubs on Acrturus. Read one here. */ +	u64 base = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE); +	u64 top = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP); + +	base &= VMSHAREDVC0_MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; +	base <<= 24; + +	top &= VMSHAREDVC0_MC_VM_FB_LOCATION_TOP__FB_TOP_MASK; +	top <<= 24; + +	adev->gmc.fb_start = base; +	adev->gmc.fb_end = top; + +	return base; +} + +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, +				uint32_t vmid, uint64_t value) +{ +	/* two registers distance between mmVML2VC0_VM_CONTEXT0_* to +	 * mmVML2VC0_VM_CONTEXT1_* +	 */ +	int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 +			- mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + +	WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, +			    dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    lower_32_bits(value)); + +	WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, +			    dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    upper_32_bits(value)); + +} + +static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, +					       int hubid) +{ +	uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + +	mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base); + +	WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    (u32)(adev->gmc.gart_start >> 12)); +	WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    (u32)(adev->gmc.gart_start >> 44)); + +	WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    (u32)(adev->gmc.gart_end >> 12)); +	WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, +					         int hubid) +{ +	uint64_t value; +	uint32_t tmp; + +	/* Program the AGP BAR */ +	WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BASE, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    0); +	WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_TOP, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    adev->gmc.agp_end >> 24); +	WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BOT, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    adev->gmc.agp_start >> 24); + +	/* Program the system aperture low logical page number. */ +	WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); +	WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + +	/* Set default page address. */ +	value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + +		adev->vm_manager.vram_base_offset; +	WREG32_SOC15_OFFSET(MMHUB, 0, +			mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, +			hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			(u32)(value >> 12)); +	WREG32_SOC15_OFFSET(MMHUB, 0, +			mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, +			hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			(u32)(value >> 44)); + +	/* Program "protection fault". */ +	WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    (u32)(adev->dummy_page_addr >> 12)); +	WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, +			    (u32)((u64)adev->dummy_page_addr >> 44)); + +	tmp = RREG32_SOC15_OFFSET(MMHUB, 0, +				  mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, +				  hubid * MMHUB_INSTANCE_REGISTER_OFFSET); +	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, +			    ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); +	WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) +{ +	uint32_t tmp; + +	/* Setup TLB control */ +	tmp = RREG32_SOC15_OFFSET(MMHUB, 0, +			   mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +			   hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + +	tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +			    ENABLE_L1_TLB, 1); +	tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +			    SYSTEM_ACCESS_MODE, 3); +	tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +			    ENABLE_ADVANCED_DRIVER_MODEL, 1); +	tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +			    SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); +	tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +			    ECO_BITS, 0); +	tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +			    MTYPE, MTYPE_UC);/* XXX for emulation. */ +	tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +			    ATC_EN, 1); + +	WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) +{ +	uint32_t tmp; + +	/* Setup L2 cache */ +	tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, +				  hubid * MMHUB_INSTANCE_REGISTER_OFFSET); +	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, +			    ENABLE_L2_CACHE, 1); +	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, +			    ENABLE_L2_FRAGMENT_PROCESSING, 1); +	/* XXX for emulation, Refer to closed source code.*/ +	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, +			    L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); +	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, +			    PDE_FAULT_CLASSIFICATION, 0); +	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, +			    CONTEXT1_IDENTITY_ACCESS_MODE, 1); +	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, +			    IDENTITY_MODE_FRAGMENT_SIZE, 0); +	WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, +		     hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + +	tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, +				  hubid * MMHUB_INSTANCE_REGISTER_OFFSET); +	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, +			    INVALIDATE_ALL_L1_TLBS, 1); +	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, +			    INVALIDATE_L2_CACHE, 1); +	WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + +	tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT; +	WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + +	tmp = mmVML2PF0_VM_L2_CNTL4_DEFAULT; +	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, +			    VMC_TAP_PDE_REQUEST_PHYSICAL, 0); +	tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, +			    VMC_TAP_PTE_REQUEST_PHYSICAL, 0); +	WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL4, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev, +					    int hubid) +{ +	uint32_t tmp; + +	tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, +				  hubid * MMHUB_INSTANCE_REGISTER_OFFSET); +	tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); +	tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); +	WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, +						 int hubid) +{ +	WREG32_SOC15_OFFSET(MMHUB, 0, +		    mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, +		    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0XFFFFFFFF); +	WREG32_SOC15_OFFSET(MMHUB, 0, +		    mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, +		    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0x0000000F); + +	WREG32_SOC15_OFFSET(MMHUB, 0, +		    mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, +		    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); +	WREG32_SOC15_OFFSET(MMHUB, 0, +		    mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, +		    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + +	WREG32_SOC15_OFFSET(MMHUB, 0, +		    mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, +		    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); +	WREG32_SOC15_OFFSET(MMHUB, 0, +		    mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, +		    hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); +} + +static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) +{ +	uint32_t tmp; +	int i; + +	for (i = 0; i <= 14; i++) { +		tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, +				hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i); +		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, +				    ENABLE_CONTEXT, 1); +		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, +				    PAGE_TABLE_DEPTH, +				    adev->vm_manager.num_level); +		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, +				    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, +				    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, +				    1); +		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, +				    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, +				    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, +				    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, +				    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, +				    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, +				    PAGE_TABLE_BLOCK_SIZE, +				    adev->vm_manager.block_size - 9); +		/* Send no-retry XNACK on fault to suppress VM fault storm. */ +		tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, +				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); +		WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, +				    hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i, +				    tmp); +		WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); +		WREG32_SOC15_OFFSET(MMHUB, 0, +			    mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, +			    hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); +		WREG32_SOC15_OFFSET(MMHUB, 0, +				mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, +				hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, +				lower_32_bits(adev->vm_manager.max_pfn - 1)); +		WREG32_SOC15_OFFSET(MMHUB, 0, +				mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, +				hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, +				upper_32_bits(adev->vm_manager.max_pfn - 1)); +	} +} + +static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, +					    int hubid) +{ +	unsigned i; + +	for (i = 0; i < 18; ++i) { +		WREG32_SOC15_OFFSET(MMHUB, 0, +				mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32, +				hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, +				0xffffffff); +		WREG32_SOC15_OFFSET(MMHUB, 0, +				mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32, +				hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, +				0x1f); +	} +} + +int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) +{ +	int i; + +	for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { +		if (amdgpu_sriov_vf(adev)) { +			/* +			 * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase +			 * they are VF copy registers so vbios post doesn't +			 * program them, for SRIOV driver need to program them +			 */ +			WREG32_SOC15_OFFSET(MMHUB, 0, +				     mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE, +				     i * MMHUB_INSTANCE_REGISTER_OFFSET, +				     adev->gmc.vram_start >> 24); +			WREG32_SOC15_OFFSET(MMHUB, 0, +				     mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP, +				     i * MMHUB_INSTANCE_REGISTER_OFFSET, +				     adev->gmc.vram_end >> 24); +		} + +		/* GART Enable. */ +		mmhub_v9_4_init_gart_aperture_regs(adev, i); +		mmhub_v9_4_init_system_aperture_regs(adev, i); +		mmhub_v9_4_init_tlb_regs(adev, i); +		mmhub_v9_4_init_cache_regs(adev, i); + +		mmhub_v9_4_enable_system_domain(adev, i); +		mmhub_v9_4_disable_identity_aperture(adev, i); +		mmhub_v9_4_setup_vmid_config(adev, i); +		mmhub_v9_4_program_invalidation(adev, i); +	} + +	return 0; +} + +void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) +{ +	u32 tmp; +	u32 i, j; + +	for (j = 0; j < MMHUB_NUM_INSTANCES; j++) { +		/* Disable all tables */ +		for (i = 0; i < 16; i++) +			WREG32_SOC15_OFFSET(MMHUB, 0, +					    mmVML2VC0_VM_CONTEXT0_CNTL, +					    j * MMHUB_INSTANCE_REGISTER_OFFSET + +					    i, 0); + +		/* Setup TLB control */ +		tmp = RREG32_SOC15_OFFSET(MMHUB, 0, +				   mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +				   j * MMHUB_INSTANCE_REGISTER_OFFSET); +		tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +				    ENABLE_L1_TLB, 0); +		tmp = REG_SET_FIELD(tmp, +				    VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +				    ENABLE_ADVANCED_DRIVER_MODEL, 0); +		WREG32_SOC15_OFFSET(MMHUB, 0, +				    mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, +				    j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + +		/* Setup L2 cache */ +		tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, +					  j * MMHUB_INSTANCE_REGISTER_OFFSET); +		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, +				    ENABLE_L2_CACHE, 0); +		WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, +				    j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +		WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, +				    j * MMHUB_INSTANCE_REGISTER_OFFSET, 0); +	} +} + +/** + * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool value) +{ +	u32 tmp; +	int i; + +	for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { +		tmp = RREG32_SOC15_OFFSET(MMHUB, 0, +					  mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +					  i * MMHUB_INSTANCE_REGISTER_OFFSET); +		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +				    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, +				    value); +		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +				    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, +				    value); +		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +				    PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, +				    value); +		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +				    PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, +				    value); +		tmp = REG_SET_FIELD(tmp, +			    VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +			    TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, +			    value); +		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +				    NACK_PROTECTION_FAULT_ENABLE_DEFAULT, +				    value); +		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +				    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, +				    value); +		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +				    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, +				    value); +		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +				    READ_PROTECTION_FAULT_ENABLE_DEFAULT, +				    value); +		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +				    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, +				    value); +		tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +				    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, +				    value); +		if (!value) { +			tmp = REG_SET_FIELD(tmp, +					    VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +					    CRASH_ON_NO_RETRY_FAULT, 1); +			tmp = REG_SET_FIELD(tmp, +					    VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +					    CRASH_ON_RETRY_FAULT, 1); +		} + +		WREG32_SOC15_OFFSET(MMHUB, 0, +				    mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, +				    i * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +	} +} + +void mmhub_v9_4_init(struct amdgpu_device *adev) +{ +	struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = +		{&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]}; +	int i; + +	for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { +		hub[i]->ctx0_ptb_addr_lo32 = +			SOC15_REG_OFFSET(MMHUB, 0, +			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + +			    i * MMHUB_INSTANCE_REGISTER_OFFSET; +		hub[i]->ctx0_ptb_addr_hi32 = +			SOC15_REG_OFFSET(MMHUB, 0, +			    mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + +			    i * MMHUB_INSTANCE_REGISTER_OFFSET; +		hub[i]->vm_inv_eng0_req = +			SOC15_REG_OFFSET(MMHUB, 0, +					 mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + +					 i * MMHUB_INSTANCE_REGISTER_OFFSET; +		hub[i]->vm_inv_eng0_ack = +			SOC15_REG_OFFSET(MMHUB, 0, +					 mmVML2VC0_VM_INVALIDATE_ENG0_ACK) + +					 i * MMHUB_INSTANCE_REGISTER_OFFSET; +		hub[i]->vm_context0_cntl = +			SOC15_REG_OFFSET(MMHUB, 0, +					 mmVML2VC0_VM_CONTEXT0_CNTL) + +					 i * MMHUB_INSTANCE_REGISTER_OFFSET; +		hub[i]->vm_l2_pro_fault_status = +			SOC15_REG_OFFSET(MMHUB, 0, +				    mmVML2PF0_VM_L2_PROTECTION_FAULT_STATUS) + +				    i * MMHUB_INSTANCE_REGISTER_OFFSET; +		hub[i]->vm_l2_pro_fault_cntl = +			SOC15_REG_OFFSET(MMHUB, 0, +				    mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) + +				    i * MMHUB_INSTANCE_REGISTER_OFFSET; +	} +} + +static void mmhub_v9_4_update_medium_grain_clock_gating(struct amdgpu_device *adev, +							bool enable) +{ +	uint32_t def, data, def1, data1; +	int i, j; +	int dist = mmDAGB1_CNTL_MISC2 - mmDAGB0_CNTL_MISC2; + +	for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { +		def = data = RREG32_SOC15_OFFSET(MMHUB, 0, +					mmATCL2_0_ATC_L2_MISC_CG, +					i * MMHUB_INSTANCE_REGISTER_OFFSET); + +		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) +			data |= ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; +		else +			data &= ~ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; + +		if (def != data) +			WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG, +				i * MMHUB_INSTANCE_REGISTER_OFFSET, data); + +		for (j = 0; j < 5; j++) { +			def1 = data1 = RREG32_SOC15_OFFSET(MMHUB, 0, +					mmDAGB0_CNTL_MISC2, +					i * MMHUB_INSTANCE_REGISTER_OFFSET + +					j * dist); +			if (enable && +			    (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { +				data1 &= +				    ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | +				    DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | +				    DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | +				    DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | +				    DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | +				    DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); +			} else { +				data1 |= +				    (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | +				    DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | +				    DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | +				    DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | +				    DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | +				    DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); +			} + +			if (def1 != data1) +				WREG32_SOC15_OFFSET(MMHUB, 0, +					mmDAGB0_CNTL_MISC2, +					i * MMHUB_INSTANCE_REGISTER_OFFSET + +					j * dist, data1); + +			if (i == 1 && j == 3) +				break; +		} +	} +} + +static void mmhub_v9_4_update_medium_grain_light_sleep(struct amdgpu_device *adev, +						       bool enable) +{ +	uint32_t def, data; +	int i; + +	for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { +		def = data = RREG32_SOC15_OFFSET(MMHUB, 0, +					mmATCL2_0_ATC_L2_MISC_CG, +					i * MMHUB_INSTANCE_REGISTER_OFFSET); + +		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) +			data |= ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; +		else +			data &= ~ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + +		if (def != data) +			WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG, +				i * MMHUB_INSTANCE_REGISTER_OFFSET, data); +	} +} + +int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, +			       enum amd_clockgating_state state) +{ +	if (amdgpu_sriov_vf(adev)) +		return 0; + +	switch (adev->asic_type) { +	case CHIP_ARCTURUS: +		mmhub_v9_4_update_medium_grain_clock_gating(adev, +				state == AMD_CG_STATE_GATE ? true : false); +		mmhub_v9_4_update_medium_grain_light_sleep(adev, +				state == AMD_CG_STATE_GATE ? true : false); +		break; +	default: +		break; +	} + +	return 0; +} + +void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ +	int data, data1; + +	if (amdgpu_sriov_vf(adev)) +		*flags = 0; + +	/* AMD_CG_SUPPORT_MC_MGCG */ +	data = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG); + +	data1 = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG); + +	if ((data & ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK) && +	    !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) +		*flags |= AMD_CG_SUPPORT_MC_MGCG; + +	/* AMD_CG_SUPPORT_MC_LS */ +	if (data & ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) +		*flags |= AMD_CG_SUPPORT_MC_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h new file mode 100644 index 000000000000..d435cfcec1a8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h @@ -0,0 +1,36 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V9_4_H__ +#define __MMHUB_V9_4_H__ + +u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev); +int mmhub_v9_4_gart_enable(struct amdgpu_device *adev); +void mmhub_v9_4_gart_disable(struct amdgpu_device *adev); +void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, +					 bool value); +void mmhub_v9_4_init(struct amdgpu_device *adev); +int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, +			       enum amd_clockgating_state state); +void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 235548c0b41f..cc5bf595f9b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -449,20 +449,6 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)  	amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);  } -static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev) -{ -	adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY; - -	/* Enable L1 security reg access mode by defaul,  as non-security VF -	 * will no longer be supported. -	 */ -	adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC; - -	adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH; - -	adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING; -} -  const struct amdgpu_virt_ops xgpu_ai_virt_ops = {  	.req_full_gpu	= xgpu_ai_request_full_gpu_access,  	.rel_full_gpu	= xgpu_ai_release_full_gpu_access, @@ -471,5 +457,4 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {  	.trans_msg = xgpu_ai_mailbox_trans_msg,  	.get_pp_clk = xgpu_ai_get_pp_clk,  	.force_dpm_level = xgpu_ai_force_dpm_level, -	.init_reg_access_mode = xgpu_ai_init_reg_access_mode,  }; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index e963746be11c..9fe08408db58 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -21,7 +21,8 @@   *   */ -#include <drm/drmP.h> +#include <linux/pci.h> +  #include "amdgpu.h"  #include "amdgpu_ih.h" diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c index 55014ce8670a..a56c93620e78 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c @@ -29,20 +29,8 @@  int navi10_reg_base_init(struct amdgpu_device *adev)  { -	int r, i; +	int i; -	if (amdgpu_discovery) { -		r = amdgpu_discovery_reg_base_init(adev); -		if (r) { -			DRM_WARN("failed to init reg base from ip discovery table, " -					"fallback to legacy init method\n"); -			goto legacy_init; -		} - -		return 0; -	} - -legacy_init:  	for (i = 0 ; i < MAX_INSTANCE ; ++i) {  		adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));  		adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); diff --git a/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c new file mode 100644 index 000000000000..cadc7603ca41 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c @@ -0,0 +1,53 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "navi12_ip_offset.h" + +int navi12_reg_base_init(struct amdgpu_device *adev) +{ +	/* HW has more IP blocks,  only initialized the blocks needed by driver */ +	uint32_t i; +	for (i = 0 ; i < MAX_INSTANCE ; ++i) { +		adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); +		adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); +		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); +		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); +		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); +		adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); +		adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); +		adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); +		adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); +		adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); +		adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); +		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); +		adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); +	} +	return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c new file mode 100644 index 000000000000..3b5f0f65e096 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c @@ -0,0 +1,54 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "navi14_ip_offset.h" + +int navi14_reg_base_init(struct amdgpu_device *adev) +{ +	int i; + +	for (i = 0 ; i < MAX_INSTANCE ; ++i) { +		adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); +		adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); +		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); +		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); +		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); +		adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); +		adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); +		adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); +		adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); +		adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); +		adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); +		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); +		adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); +	} + +	return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 835d7b1a841f..c05d78d4efc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -92,7 +92,7 @@ static void nbio_v2_3_sdma_doorbell_range(struct amdgpu_device *adev, int instan  }  static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, -					 int doorbell_index) +					 int doorbell_index, int instance)  {  	u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 73419fa38159..74eecb768a82 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -91,6 +91,26 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan  	WREG32(reg, doorbell_range);  } +static void nbio_v7_0_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, +					 int doorbell_index, int instance) +{ +	u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + +	u32 doorbell_range = RREG32(reg); + +	if (use_doorbell) { +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       BIF_MMSCH0_DOORBELL_RANGE, OFFSET, +					       doorbell_index); +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); +	} else +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); + +	WREG32(reg, doorbell_range); +} +  static void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev,  					       bool enable)  { @@ -282,6 +302,7 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {  	.hdp_flush = nbio_v7_0_hdp_flush,  	.get_memsize = nbio_v7_0_get_memsize,  	.sdma_doorbell_range = nbio_v7_0_sdma_doorbell_range, +	.vcn_doorbell_range = nbio_v7_0_vcn_doorbell_range,  	.enable_doorbell_aperture = nbio_v7_0_enable_doorbell_aperture,  	.enable_doorbell_selfring_aperture = nbio_v7_0_enable_doorbell_selfring_aperture,  	.ih_doorbell_range = nbio_v7_0_ih_doorbell_range, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index bfaaa327ae3c..910fffced43b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -31,6 +31,25 @@  #define smnNBIF_MGCG_CTRL_LCLK	0x1013a21c +/* + * These are nbio v7_4_1 registers mask. Temporarily define these here since + * nbio v7_4_1 header is incomplete. + */ +#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK	0x00001000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK	0x00002000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK	0x00004000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK	0x00008000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK	0x00010000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK	0x00020000L + +#define mmBIF_MMSCH1_DOORBELL_RANGE                     0x01dc +#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX            2 +//BIF_MMSCH1_DOORBELL_RANGE +#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET__SHIFT        0x2 +#define BIF_MMSCH1_DOORBELL_RANGE__SIZE__SHIFT          0x10 +#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK          0x00000FFCL +#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK            0x001F0000L +  static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)  {  	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -75,10 +94,24 @@ static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)  static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance,  			bool use_doorbell, int doorbell_index, int doorbell_size)  { -	u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : -			SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); +	u32 reg, doorbell_range; -	u32 doorbell_range = RREG32(reg); +	if (instance < 2) +		reg = instance + +			SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); +	else +		/* +		 * These registers address of SDMA2~7 is not consecutive +		 * from SDMA0~1. Need plus 4 dwords offset. +		 * +		 *   BIF_SDMA0_DOORBELL_RANGE:  0x3bc0 +		 *   BIF_SDMA1_DOORBELL_RANGE:  0x3bc4 +		 *   BIF_SDMA2_DOORBELL_RANGE:  0x3bd8 +		 */ +		reg = instance + 0x4 + +			SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); + +	doorbell_range = RREG32(reg);  	if (use_doorbell) {  		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); @@ -89,6 +122,32 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instan  	WREG32(reg, doorbell_range);  } +static void nbio_v7_4_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, +					 int doorbell_index, int instance) +{ +	u32 reg; +	u32 doorbell_range; + +	if (instance) +		reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE); +	else +		reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + +	doorbell_range = RREG32(reg); + +	if (use_doorbell) { +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       BIF_MMSCH0_DOORBELL_RANGE, OFFSET, +					       doorbell_index); +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); +	} else +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); + +	WREG32(reg, doorbell_range); +} +  static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev,  					       bool enable)  { @@ -220,6 +279,12 @@ static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {  	.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,  	.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK,  	.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, +	.ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK, +	.ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, +	.ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, +	.ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, +	.ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, +	.ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,  };  static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) @@ -261,6 +326,7 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {  	.hdp_flush = nbio_v7_4_hdp_flush,  	.get_memsize = nbio_v7_4_get_memsize,  	.sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range, +	.vcn_doorbell_range = nbio_v7_4_vcn_doorbell_range,  	.enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture,  	.enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture,  	.ih_doorbell_range = nbio_v7_4_ih_doorbell_range, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 662612f89c70..85393a99a848 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -23,7 +23,8 @@  #include <linux/firmware.h>  #include <linux/slab.h>  #include <linux/module.h> -#include <drm/drmP.h> +#include <linux/pci.h> +  #include "amdgpu.h"  #include "amdgpu_atombios.h"  #include "amdgpu_ih.h" @@ -289,6 +290,18 @@ static int nv_asic_mode1_reset(struct amdgpu_device *adev)  	return ret;  } + +static enum amd_reset_method +nv_asic_reset_method(struct amdgpu_device *adev) +{ +	struct smu_context *smu = &adev->smu; + +	if (smu_baco_is_support(smu)) +		return AMD_RESET_METHOD_BACO; +	else +		return AMD_RESET_METHOD_MODE1; +} +  static int nv_asic_reset(struct amdgpu_device *adev)  { @@ -303,10 +316,13 @@ static int nv_asic_reset(struct amdgpu_device *adev)  	int ret = 0;  	struct smu_context *smu = &adev->smu; -	if (smu_baco_is_support(smu)) +	if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { +		amdgpu_inc_vram_lost(adev);  		ret = smu_baco_reset(smu); -	else +	} else { +		amdgpu_inc_vram_lost(adev);  		ret = nv_asic_mode1_reset(adev); +	}  	return ret;  } @@ -363,23 +379,55 @@ static const struct amdgpu_ip_block_version nv_common_ip_block =  	.funcs = &nv_common_ip_funcs,  }; -int nv_set_ip_blocks(struct amdgpu_device *adev) +static int nv_reg_base_init(struct amdgpu_device *adev)  { -	/* Set IP register base before any HW register access */ +	int r; + +	if (amdgpu_discovery) { +		r = amdgpu_discovery_reg_base_init(adev); +		if (r) { +			DRM_WARN("failed to init reg base from ip discovery table, " +					"fallback to legacy init method\n"); +			goto legacy_init; +		} + +		return 0; +	} + +legacy_init:  	switch (adev->asic_type) {  	case CHIP_NAVI10:  		navi10_reg_base_init(adev);  		break; +	case CHIP_NAVI14: +		navi14_reg_base_init(adev); +		break; +	case CHIP_NAVI12: +		navi12_reg_base_init(adev); +		break;  	default:  		return -EINVAL;  	} +	return 0; +} + +int nv_set_ip_blocks(struct amdgpu_device *adev) +{ +	int r; + +	/* Set IP register base before any HW register access */ +	r = nv_reg_base_init(adev); +	if (r) +		return r; +  	adev->nbio_funcs = &nbio_v2_3_funcs;  	adev->nbio_funcs->detect_hw_virt(adev);  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14:  		amdgpu_device_ip_block_add(adev, &nv_common_ip_block);  		amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block);  		amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); @@ -402,6 +450,27 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)  		if (adev->enable_mes)  			amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);  		break; +	case CHIP_NAVI12: +		amdgpu_device_ip_block_add(adev, &nv_common_ip_block); +		amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); +		amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); +		amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); +		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && +		    is_support_sw_smu(adev)) +			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); +		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) +			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) +		else if (amdgpu_device_has_dc_support(adev)) +			amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif +		amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); +		amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); +		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && +		    is_support_sw_smu(adev)) +			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); +		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); +		break;  	default:  		return -EINVAL;  	} @@ -496,6 +565,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =  	.read_bios_from_rom = &nv_read_bios_from_rom,  	.read_register = &nv_read_register,  	.reset = &nv_asic_reset, +	.reset_method = &nv_asic_reset_method,  	.set_vga_state = &nv_vga_set_state,  	.get_xclk = &nv_get_xclk,  	.set_uvd_clocks = &nv_set_uvd_clocks, @@ -511,7 +581,6 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =  static int nv_common_early_init(void *handle)  { -	bool psp_enabled = false;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	adev->smc_rreg = NULL; @@ -528,10 +597,6 @@ static int nv_common_early_init(void *handle)  	adev->asic_funcs = &nv_asic_funcs; -	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) && -	    (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) -		psp_enabled = true; -  	adev->rev_id = nv_get_rev_id(adev);  	adev->external_rev_id = 0xff;  	switch (adev->asic_type) { @@ -552,10 +617,49 @@ static int nv_common_early_init(void *handle)  			AMD_CG_SUPPORT_BIF_LS;  		adev->pg_flags = AMD_PG_SUPPORT_VCN |  			AMD_PG_SUPPORT_VCN_DPG | -			AMD_PG_SUPPORT_MMHUB |  			AMD_PG_SUPPORT_ATHUB;  		adev->external_rev_id = adev->rev_id + 0x1;  		break; +	case CHIP_NAVI14: +		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | +			AMD_CG_SUPPORT_GFX_CGCG | +			AMD_CG_SUPPORT_IH_CG | +			AMD_CG_SUPPORT_HDP_MGCG | +			AMD_CG_SUPPORT_HDP_LS | +			AMD_CG_SUPPORT_SDMA_MGCG | +			AMD_CG_SUPPORT_SDMA_LS | +			AMD_CG_SUPPORT_MC_MGCG | +			AMD_CG_SUPPORT_MC_LS | +			AMD_CG_SUPPORT_ATHUB_MGCG | +			AMD_CG_SUPPORT_ATHUB_LS | +			AMD_CG_SUPPORT_VCN_MGCG | +			AMD_CG_SUPPORT_BIF_MGCG | +			AMD_CG_SUPPORT_BIF_LS; +		adev->pg_flags = AMD_PG_SUPPORT_VCN | +			AMD_PG_SUPPORT_VCN_DPG; +		adev->external_rev_id = adev->rev_id + 20; +		break; +	case CHIP_NAVI12: +		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | +			AMD_CG_SUPPORT_GFX_MGLS | +			AMD_CG_SUPPORT_GFX_CGCG | +			AMD_CG_SUPPORT_GFX_CP_LS | +			AMD_CG_SUPPORT_GFX_RLC_LS | +			AMD_CG_SUPPORT_IH_CG | +			AMD_CG_SUPPORT_HDP_MGCG | +			AMD_CG_SUPPORT_HDP_LS | +			AMD_CG_SUPPORT_SDMA_MGCG | +			AMD_CG_SUPPORT_SDMA_LS | +			AMD_CG_SUPPORT_MC_MGCG | +			AMD_CG_SUPPORT_MC_LS | +			AMD_CG_SUPPORT_ATHUB_MGCG | +			AMD_CG_SUPPORT_ATHUB_LS | +			AMD_CG_SUPPORT_VCN_MGCG; +		adev->pg_flags = AMD_PG_SUPPORT_VCN | +			AMD_PG_SUPPORT_VCN_DPG | +			AMD_PG_SUPPORT_ATHUB; +		adev->external_rev_id = adev->rev_id + 0xa; +		break;  	default:  		/* FIXME: not supported yet */  		return -EINVAL; @@ -748,6 +852,8 @@ static int nv_common_set_clockgating_state(void *handle,  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		adev->nbio_funcs->update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE ? true : false);  		adev->nbio_funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 639c54933cc5..82e6cb432f3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -30,4 +30,6 @@ void nv_grbm_select(struct amdgpu_device *adev,  		    u32 me, u32 pipe, u32 queue, u32 vmid);  int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev); +int navi14_reg_base_init(struct amdgpu_device *adev); +int navi12_reg_base_init(struct amdgpu_device *adev);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 5080a73a95a5..74a9fe8e0cfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -233,8 +233,15 @@ enum psp_gfx_fw_type {  	GFX_FW_TYPE_RLCP_CAM                        = 46,   /* RLCP CAM                 NV      */  	GFX_FW_TYPE_RLC_SPP_CAM_EXT                 = 47,   /* RLC SPP CAM EXT          NV      */  	GFX_FW_TYPE_RLX6_DRAM_BOOT                  = 48,   /* RLX6 DRAM BOOT           NV      */ -	GFX_FW_TYPE_VCN0_RAM                        = 49,   /* VCN_RAM  NV */ -	GFX_FW_TYPE_VCN1_RAM                        = 50,   /* VCN_RAM  NV */ +	GFX_FW_TYPE_VCN0_RAM                        = 49,   /* VCN_RAM                  NV + RN */ +	GFX_FW_TYPE_VCN1_RAM                        = 50,   /* VCN_RAM                  NV + RN */ +	GFX_FW_TYPE_DMUB                            = 51,   /* DMUB                          RN */ +	GFX_FW_TYPE_SDMA2                           = 52,   /* SDMA2                    MI      */ +	GFX_FW_TYPE_SDMA3                           = 53,   /* SDMA3                    MI      */ +	GFX_FW_TYPE_SDMA4                           = 54,   /* SDMA4                    MI      */ +	GFX_FW_TYPE_SDMA5                           = 55,   /* SDMA5                    MI      */ +	GFX_FW_TYPE_SDMA6                           = 56,   /* SDMA6                    MI      */ +	GFX_FW_TYPE_SDMA7                           = 57,   /* SDMA7                    MI      */  	GFX_FW_TYPE_MAX  }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index ce1ea31feee0..5d95e614369a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -190,7 +190,6 @@ static int psp_v10_0_ring_destroy(struct psp_context *psp,  }  static int psp_v10_0_cmd_submit(struct psp_context *psp, -				struct amdgpu_firmware_info *ucode,  				uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,  				int index)  { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 41b72588adcf..10166104b8a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -43,6 +43,12 @@ MODULE_FIRMWARE("amdgpu/vega20_asd.bin");  MODULE_FIRMWARE("amdgpu/vega20_ta.bin");  MODULE_FIRMWARE("amdgpu/navi10_sos.bin");  MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi14_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_asd.bin");  /* address block */  #define smnMP1_FIRMWARE_FLAGS		0x3010024 @@ -60,6 +66,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)  	int err = 0;  	const struct psp_firmware_header_v1_0 *sos_hdr;  	const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; +	const struct psp_firmware_header_v1_2 *sos_hdr_v1_2;  	const struct psp_firmware_header_v1_0 *asd_hdr;  	const struct ta_firmware_header_v1_0 *ta_hdr; @@ -72,6 +79,15 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)  	case CHIP_NAVI10:  		chip_name = "navi10";  		break; +	case CHIP_NAVI14: +		chip_name = "navi14"; +		break; +	case CHIP_NAVI12: +		chip_name = "navi12"; +		break; +	case CHIP_ARCTURUS: +		chip_name = "arcturus"; +		break;  	default:  		BUG();  	} @@ -107,6 +123,12 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)  			adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr +  					le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes);  		} +		if (sos_hdr->header.header_version_minor == 2) { +			sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data; +			adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes); +			adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + +						    le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes); +		}  		break;  	default:  		dev_err(adev->dev, @@ -158,6 +180,9 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)  		}  		break;  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12: +	case CHIP_ARCTURUS:  		break;  	default:  		BUG(); @@ -473,7 +498,6 @@ static int psp_v11_0_ring_destroy(struct psp_context *psp,  }  static int psp_v11_0_cmd_submit(struct psp_context *psp, -			       struct amdgpu_firmware_info *ucode,  			       uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,  			       int index)  { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c new file mode 100644 index 000000000000..c72e43f8e0be --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -0,0 +1,565 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v12_0.h" + +#include "mp/mp_12_0_0_offset.h" +#include "mp/mp_12_0_0_sh_mask.h" +#include "gc/gc_9_0_offset.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "nbio/nbio_7_4_offset.h" + +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/renoir_asd.bin"); +/* address block */ +#define smnMP1_FIRMWARE_FLAGS		0x3010024 + +static int psp_v12_0_init_microcode(struct psp_context *psp) +{ +	struct amdgpu_device *adev = psp->adev; +	const char *chip_name; +	char fw_name[30]; +	int err = 0; +	const struct psp_firmware_header_v1_0 *asd_hdr; + +	DRM_DEBUG("\n"); + +	switch (adev->asic_type) { +	case CHIP_RENOIR: +		chip_name = "renoir"; +		break; +	default: +		BUG(); +	} + +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); +	err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); +	if (err) +		goto out1; + +	err = amdgpu_ucode_validate(adev->psp.asd_fw); +	if (err) +		goto out1; + +	asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; +	adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version); +	adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version); +	adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes); +	adev->psp.asd_start_addr = (uint8_t *)asd_hdr + +				le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes); + +	return 0; + +out1: +	release_firmware(adev->psp.asd_fw); +	adev->psp.asd_fw = NULL; + +	return err; +} + +static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) +{ +	int ret; +	uint32_t psp_gfxdrv_command_reg = 0; +	struct amdgpu_device *adev = psp->adev; +	uint32_t sol_reg; + +	/* Check sOS sign of life register to confirm sys driver and sOS +	 * are already been loaded. +	 */ +	sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); +	if (sol_reg) { +		psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); +		printk("sos fw version = 0x%x.\n", psp->sos_fw_version); +		return 0; +	} + +	/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ +	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), +			   0x80000000, 0x80000000, false); +	if (ret) +		return ret; + +	memset(psp->fw_pri_buf, 0, PSP_1_MEG); + +	/* Copy PSP System Driver binary to memory */ +	memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); + +	/* Provide the sys driver to bootloader */ +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, +	       (uint32_t)(psp->fw_pri_mc_addr >> 20)); +	psp_gfxdrv_command_reg = 1 << 16; +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, +	       psp_gfxdrv_command_reg); + +	/* there might be handshake issue with hardware which needs delay */ +	mdelay(20); + +	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), +			   0x80000000, 0x80000000, false); + +	return ret; +} + +static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) +{ +	int ret; +	unsigned int psp_gfxdrv_command_reg = 0; +	struct amdgpu_device *adev = psp->adev; +	uint32_t sol_reg; + +	/* Check sOS sign of life register to confirm sys driver and sOS +	 * are already been loaded. +	 */ +	sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); +	if (sol_reg) +		return 0; + +	/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ +	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), +			   0x80000000, 0x80000000, false); +	if (ret) +		return ret; + +	memset(psp->fw_pri_buf, 0, PSP_1_MEG); + +	/* Copy Secure OS binary to PSP memory */ +	memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); + +	/* Provide the PSP secure OS to bootloader */ +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, +	       (uint32_t)(psp->fw_pri_mc_addr >> 20)); +	psp_gfxdrv_command_reg = 2 << 16; +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, +	       psp_gfxdrv_command_reg); + +	/* there might be handshake issue with hardware which needs delay */ +	mdelay(20); +	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), +			   RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), +			   0, true); + +	return ret; +} + +static void psp_v12_0_reroute_ih(struct psp_context *psp) +{ +	struct amdgpu_device *adev = psp->adev; +	uint32_t tmp; + +	/* Change IH ring for VMC */ +	tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); +	tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); +	tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + +	mdelay(20); +	psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), +		     0x80000000, 0x8000FFFF, false); + +	/* Change IH ring for UMC */ +	tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); +	tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); +	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + +	mdelay(20); +	psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), +		     0x80000000, 0x8000FFFF, false); +} + +static int psp_v12_0_ring_init(struct psp_context *psp, +			      enum psp_ring_type ring_type) +{ +	int ret = 0; +	struct psp_ring *ring; +	struct amdgpu_device *adev = psp->adev; + +	psp_v12_0_reroute_ih(psp); + +	ring = &psp->km_ring; + +	ring->ring_type = ring_type; + +	/* allocate 4k Page of Local Frame Buffer memory for ring */ +	ring->ring_size = 0x1000; +	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, +				      AMDGPU_GEM_DOMAIN_VRAM, +				      &adev->firmware.rbuf, +				      &ring->ring_mem_mc_addr, +				      (void **)&ring->ring_mem); +	if (ret) { +		ring->ring_size = 0; +		return ret; +	} + +	return 0; +} + +static bool psp_v12_0_support_vmr_ring(struct psp_context *psp) +{ +	if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045) +		return true; +	return false; +} + +static int psp_v12_0_ring_create(struct psp_context *psp, +				enum psp_ring_type ring_type) +{ +	int ret = 0; +	unsigned int psp_ring_reg = 0; +	struct psp_ring *ring = &psp->km_ring; +	struct amdgpu_device *adev = psp->adev; + +	if (psp_v12_0_support_vmr_ring(psp)) { +		/* Write low address of the ring to C2PMSG_102 */ +		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); +		/* Write high address of the ring to C2PMSG_103 */ +		psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + +		/* Write the ring initialization command to C2PMSG_101 */ +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, +					     GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + +		/* there might be handshake issue with hardware which needs delay */ +		mdelay(20); + +		/* Wait for response flag (bit 31) in C2PMSG_101 */ +		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), +				   0x80000000, 0x8000FFFF, false); + +	} else { +		/* Write low address of the ring to C2PMSG_69 */ +		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); +		/* Write high address of the ring to C2PMSG_70 */ +		psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); +		/* Write size of ring to C2PMSG_71 */ +		psp_ring_reg = ring->ring_size; +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); +		/* Write the ring initialization command to C2PMSG_64 */ +		psp_ring_reg = ring_type; +		psp_ring_reg = psp_ring_reg << 16; +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + +		/* there might be handshake issue with hardware which needs delay */ +		mdelay(20); + +		/* Wait for response flag (bit 31) in C2PMSG_64 */ +		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), +				   0x80000000, 0x8000FFFF, false); +	} + +	return ret; +} + +static int psp_v12_0_ring_stop(struct psp_context *psp, +			      enum psp_ring_type ring_type) +{ +	int ret = 0; +	struct amdgpu_device *adev = psp->adev; + +	/* Write the ring destroy command*/ +	if (psp_v12_0_support_vmr_ring(psp)) +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, +				     GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); +	else +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, +				     GFX_CTRL_CMD_ID_DESTROY_RINGS); + +	/* there might be handshake issue with hardware which needs delay */ +	mdelay(20); + +	/* Wait for response flag (bit 31) */ +	if (psp_v12_0_support_vmr_ring(psp)) +		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), +				   0x80000000, 0x80000000, false); +	else +		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), +				   0x80000000, 0x80000000, false); + +	return ret; +} + +static int psp_v12_0_ring_destroy(struct psp_context *psp, +				 enum psp_ring_type ring_type) +{ +	int ret = 0; +	struct psp_ring *ring = &psp->km_ring; +	struct amdgpu_device *adev = psp->adev; + +	ret = psp_v12_0_ring_stop(psp, ring_type); +	if (ret) +		DRM_ERROR("Fail to stop psp ring\n"); + +	amdgpu_bo_free_kernel(&adev->firmware.rbuf, +			      &ring->ring_mem_mc_addr, +			      (void **)&ring->ring_mem); + +	return ret; +} + +static int psp_v12_0_cmd_submit(struct psp_context *psp, +			       uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, +			       int index) +{ +	unsigned int psp_write_ptr_reg = 0; +	struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem; +	struct psp_ring *ring = &psp->km_ring; +	struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; +	struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + +		ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; +	struct amdgpu_device *adev = psp->adev; +	uint32_t ring_size_dw = ring->ring_size / 4; +	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; + +	/* KM (GPCOM) prepare write pointer */ +	if (psp_v12_0_support_vmr_ring(psp)) +		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); +	else +		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + +	/* Update KM RB frame pointer to new frame */ +	/* write_frame ptr increments by size of rb_frame in bytes */ +	/* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ +	if ((psp_write_ptr_reg % ring_size_dw) == 0) +		write_frame = ring_buffer_start; +	else +		write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); +	/* Check invalid write_frame ptr address */ +	if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { +		DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", +			  ring_buffer_start, ring_buffer_end, write_frame); +		DRM_ERROR("write_frame is pointing to address out of bounds\n"); +		return -EINVAL; +	} + +	/* Initialize KM RB frame */ +	memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); + +	/* Update KM RB frame */ +	write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); +	write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); +	write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); +	write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); +	write_frame->fence_value = index; + +	/* Update the write Pointer in DWORDs */ +	psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; +	if (psp_v12_0_support_vmr_ring(psp)) { +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); +	} else +		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + +	return 0; +} + +static int +psp_v12_0_sram_map(struct amdgpu_device *adev, +		  unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, +		  unsigned int *sram_data_reg_offset, +		  enum AMDGPU_UCODE_ID ucode_id) +{ +	int ret = 0; + +	switch (ucode_id) { +/* TODO: needs to confirm */ +#if 0 +	case AMDGPU_UCODE_ID_SMC: +		*sram_offset = 0; +		*sram_addr_reg_offset = 0; +		*sram_data_reg_offset = 0; +		break; +#endif + +	case AMDGPU_UCODE_ID_CP_CE: +		*sram_offset = 0x0; +		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR); +		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA); +		break; + +	case AMDGPU_UCODE_ID_CP_PFP: +		*sram_offset = 0x0; +		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR); +		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA); +		break; + +	case AMDGPU_UCODE_ID_CP_ME: +		*sram_offset = 0x0; +		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR); +		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA); +		break; + +	case AMDGPU_UCODE_ID_CP_MEC1: +		*sram_offset = 0x10000; +		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR); +		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA); +		break; + +	case AMDGPU_UCODE_ID_CP_MEC2: +		*sram_offset = 0x10000; +		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR); +		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA); +		break; + +	case AMDGPU_UCODE_ID_RLC_G: +		*sram_offset = 0x2000; +		*sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); +		*sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); +		break; + +	case AMDGPU_UCODE_ID_SDMA0: +		*sram_offset = 0x0; +		*sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); +		*sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); +		break; + +/* TODO: needs to confirm */ +#if 0 +	case AMDGPU_UCODE_ID_SDMA1: +		*sram_offset = ; +		*sram_addr_reg_offset = ; +		break; + +	case AMDGPU_UCODE_ID_UVD: +		*sram_offset = ; +		*sram_addr_reg_offset = ; +		break; + +	case AMDGPU_UCODE_ID_VCE: +		*sram_offset = ; +		*sram_addr_reg_offset = ; +		break; +#endif + +	case AMDGPU_UCODE_ID_MAXIMUM: +	default: +		ret = -EINVAL; +		break; +	} + +	return ret; +} + +static bool psp_v12_0_compare_sram_data(struct psp_context *psp, +				       struct amdgpu_firmware_info *ucode, +				       enum AMDGPU_UCODE_ID ucode_type) +{ +	int err = 0; +	unsigned int fw_sram_reg_val = 0; +	unsigned int fw_sram_addr_reg_offset = 0; +	unsigned int fw_sram_data_reg_offset = 0; +	unsigned int ucode_size; +	uint32_t *ucode_mem = NULL; +	struct amdgpu_device *adev = psp->adev; + +	err = psp_v12_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset, +				&fw_sram_data_reg_offset, ucode_type); +	if (err) +		return false; + +	WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val); + +	ucode_size = ucode->ucode_size; +	ucode_mem = (uint32_t *)ucode->kaddr; +	while (ucode_size) { +		fw_sram_reg_val = RREG32(fw_sram_data_reg_offset); + +		if (*ucode_mem != fw_sram_reg_val) +			return false; + +		ucode_mem++; +		/* 4 bytes */ +		ucode_size -= 4; +	} + +	return true; +} + +static int psp_v12_0_mode1_reset(struct psp_context *psp) +{ +	int ret; +	uint32_t offset; +	struct amdgpu_device *adev = psp->adev; + +	offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); + +	ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + +	if (ret) { +		DRM_INFO("psp is not working correctly before mode1 reset!\n"); +		return -EINVAL; +	} + +	/*send the mode 1 reset command*/ +	WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); + +	msleep(500); + +	offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); + +	ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); + +	if (ret) { +		DRM_INFO("psp mode 1 reset failed!\n"); +		return -EINVAL; +	} + +	DRM_INFO("psp mode1 reset succeed \n"); + +	return 0; +} + +static const struct psp_funcs psp_v12_0_funcs = { +	.init_microcode = psp_v12_0_init_microcode, +	.bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv, +	.bootloader_load_sos = psp_v12_0_bootloader_load_sos, +	.ring_init = psp_v12_0_ring_init, +	.ring_create = psp_v12_0_ring_create, +	.ring_stop = psp_v12_0_ring_stop, +	.ring_destroy = psp_v12_0_ring_destroy, +	.cmd_submit = psp_v12_0_cmd_submit, +	.compare_sram_data = psp_v12_0_compare_sram_data, +	.mode1_reset = psp_v12_0_mode1_reset, +}; + +void psp_v12_0_set_psp_funcs(struct psp_context *psp) +{ +	psp->funcs = &psp_v12_0_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h new file mode 100644 index 000000000000..241693ab1990 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V12_0_H__ +#define __PSP_V12_0_H__ + +#include "amdgpu_psp.h" + +void psp_v12_0_set_psp_funcs(struct psp_context *psp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 019c47feee42..d2c727f6a8bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -411,7 +411,6 @@ static int psp_v3_1_ring_destroy(struct psp_context *psp,  }  static int psp_v3_1_cmd_submit(struct psp_context *psp, -			       struct amdgpu_firmware_info *ucode,  			       uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,  			       int index)  { @@ -636,7 +635,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)  static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)  { -	if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455) +	if (amdgpu_sriov_vf(psp->adev))  		return true;  	return false; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 4428018672d3..78452cf0115d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -34,6 +34,18 @@  #include "sdma0/sdma0_4_2_sh_mask.h"  #include "sdma1/sdma1_4_2_offset.h"  #include "sdma1/sdma1_4_2_sh_mask.h" +#include "sdma2/sdma2_4_2_2_offset.h" +#include "sdma2/sdma2_4_2_2_sh_mask.h" +#include "sdma3/sdma3_4_2_2_offset.h" +#include "sdma3/sdma3_4_2_2_sh_mask.h" +#include "sdma4/sdma4_4_2_2_offset.h" +#include "sdma4/sdma4_4_2_2_sh_mask.h" +#include "sdma5/sdma5_4_2_2_offset.h" +#include "sdma5/sdma5_4_2_2_sh_mask.h" +#include "sdma6/sdma6_4_2_2_offset.h" +#include "sdma6/sdma6_4_2_2_sh_mask.h" +#include "sdma7/sdma7_4_2_2_offset.h" +#include "sdma7/sdma7_4_2_2_sh_mask.h"  #include "hdp/hdp_4_0_offset.h"  #include "sdma0/sdma0_4_1_default.h" @@ -55,6 +67,8 @@ MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");  MODULE_FIRMWARE("amdgpu/raven_sdma.bin");  MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");  MODULE_FIRMWARE("amdgpu/raven2_sdma.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin"); +MODULE_FIRMWARE("amdgpu/renoir_sdma.bin");  #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK  0x000000F8L  #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L @@ -202,25 +216,132 @@ static const struct soc15_reg_golden golden_settings_sdma_rv2[] =  	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001)  }; +static const struct soc15_reg_golden golden_settings_sdma_arct[] = +{ +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), +	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07), +	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07), +	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07), +	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07), +	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07), +	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07), +	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), +	SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002) +}; + +static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002), +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051), +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), +	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), +}; +  static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,  		u32 instance, u32 offset)  { -	return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) : -			(adev->reg_offset[SDMA1_HWIP][0][0] + offset)); +	switch (instance) { +	case 0: +		return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); +	case 1: +		return (adev->reg_offset[SDMA1_HWIP][0][0] + offset); +	case 2: +		return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); +	case 3: +		return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); +	case 4: +		return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); +	case 5: +		return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); +	case 6: +		return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); +	case 7: +		return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); +	default: +		break; +	} +	return 0; +} + +static unsigned sdma_v4_0_seq_to_irq_id(int seq_num) +{ +	switch (seq_num) { +	case 0: +		return SOC15_IH_CLIENTID_SDMA0; +	case 1: +		return SOC15_IH_CLIENTID_SDMA1; +	case 2: +		return SOC15_IH_CLIENTID_SDMA2; +	case 3: +		return SOC15_IH_CLIENTID_SDMA3; +	case 4: +		return SOC15_IH_CLIENTID_SDMA4; +	case 5: +		return SOC15_IH_CLIENTID_SDMA5; +	case 6: +		return SOC15_IH_CLIENTID_SDMA6; +	case 7: +		return SOC15_IH_CLIENTID_SDMA7; +	default: +		break; +	} +	return -EINVAL; +} + +static int sdma_v4_0_irq_id_to_seq(unsigned client_id) +{ +	switch (client_id) { +	case SOC15_IH_CLIENTID_SDMA0: +		return 0; +	case SOC15_IH_CLIENTID_SDMA1: +		return 1; +	case SOC15_IH_CLIENTID_SDMA2: +		return 2; +	case SOC15_IH_CLIENTID_SDMA3: +		return 3; +	case SOC15_IH_CLIENTID_SDMA4: +		return 4; +	case SOC15_IH_CLIENTID_SDMA5: +		return 5; +	case SOC15_IH_CLIENTID_SDMA6: +		return 6; +	case SOC15_IH_CLIENTID_SDMA7: +		return 7; +	default: +		break; +	} +	return -EINVAL;  }  static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)  {  	switch (adev->asic_type) {  	case CHIP_VEGA10: -		if (!amdgpu_virt_support_skip_setting(adev)) { -			soc15_program_register_sequence(adev, -							 golden_settings_sdma_4, -							 ARRAY_SIZE(golden_settings_sdma_4)); -			soc15_program_register_sequence(adev, -							 golden_settings_sdma_vg10, -							 ARRAY_SIZE(golden_settings_sdma_vg10)); -		} +		soc15_program_register_sequence(adev, +						golden_settings_sdma_4, +						ARRAY_SIZE(golden_settings_sdma_4)); +		soc15_program_register_sequence(adev, +						golden_settings_sdma_vg10, +						ARRAY_SIZE(golden_settings_sdma_vg10));  		break;  	case CHIP_VEGA12:  		soc15_program_register_sequence(adev, @@ -241,6 +362,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)  						golden_settings_sdma1_4_2,  						ARRAY_SIZE(golden_settings_sdma1_4_2));  		break; +	case CHIP_ARCTURUS: +		soc15_program_register_sequence(adev, +						golden_settings_sdma_arct, +						ARRAY_SIZE(golden_settings_sdma_arct)); +		break;  	case CHIP_RAVEN:  		soc15_program_register_sequence(adev,  						golden_settings_sdma_4_1, @@ -254,11 +380,53 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)  							golden_settings_sdma_rv1,  							ARRAY_SIZE(golden_settings_sdma_rv1));  		break; +	case CHIP_RENOIR: +		soc15_program_register_sequence(adev, +						golden_settings_sdma_4_3, +						ARRAY_SIZE(golden_settings_sdma_4_3)); +		break;  	default:  		break;  	}  } +static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) +{ +	int err = 0; +	const struct sdma_firmware_header_v1_0 *hdr; + +	err = amdgpu_ucode_validate(sdma_inst->fw); +	if (err) +		return err; + +	hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; +	sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); +	sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); + +	if (sdma_inst->feature_version >= 20) +		sdma_inst->burst_nop = true; + +	return 0; +} + +static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) +{ +	int i; + +	for (i = 0; i < adev->sdma.num_instances; i++) { +		if (adev->sdma.instance[i].fw != NULL) +			release_firmware(adev->sdma.instance[i].fw); + +		/* arcturus shares the same FW memory across +		   all SDMA isntances */ +		if (adev->asic_type == CHIP_ARCTURUS) +			break; +	} + +	memset((void*)adev->sdma.instance, 0, +		sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); +} +  /**   * sdma_v4_0_init_microcode - load ucode images from disk   * @@ -278,7 +446,6 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)  	int err = 0, i;  	struct amdgpu_firmware_info *info = NULL;  	const struct common_firmware_header *header = NULL; -	const struct sdma_firmware_header_v1_0 *hdr;  	DRM_DEBUG("\n"); @@ -300,30 +467,52 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)  		else  			chip_name = "raven";  		break; +	case CHIP_ARCTURUS: +		chip_name = "arcturus"; +		break; +	case CHIP_RENOIR: +		chip_name = "renoir"; +		break;  	default:  		BUG();  	} -	for (i = 0; i < adev->sdma.num_instances; i++) { -		if (i == 0) -			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); -		else -			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); -		err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); -		if (err) -			goto out; -		err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); -		if (err) -			goto out; -		hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; -		adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); -		adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); -		if (adev->sdma.instance[i].feature_version >= 20) -			adev->sdma.instance[i].burst_nop = true; -		DRM_DEBUG("psp_load == '%s'\n", -				adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); - -		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + +	err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); +	if (err) +		goto out; + +	err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]); +	if (err) +		goto out; + +	for (i = 1; i < adev->sdma.num_instances; i++) { +		if (adev->asic_type == CHIP_ARCTURUS) { +			/* Acturus will leverage the same FW memory +			   for every SDMA instance */ +			memcpy((void*)&adev->sdma.instance[i], +			       (void*)&adev->sdma.instance[0], +			       sizeof(struct amdgpu_sdma_instance)); +		} +		else { +			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); + +			err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); +			if (err) +				goto out; + +			err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]); +			if (err) +				goto out; +		} +	} + +	DRM_DEBUG("psp_load == '%s'\n", +		adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); + +	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { +		for (i = 0; i < adev->sdma.num_instances; i++) {  			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];  			info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;  			info->fw = adev->sdma.instance[i].fw; @@ -332,13 +521,11 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)  				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);  		}  	} +  out:  	if (err) {  		DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name); -		for (i = 0; i < adev->sdma.num_instances; i++) { -			release_firmware(adev->sdma.instance[i].fw); -			adev->sdma.instance[i].fw = NULL; -		} +		sdma_v4_0_destroy_inst_ctx(adev);  	}  	return err;  } @@ -561,10 +748,7 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)  	u32 ref_and_mask = 0;  	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; -	if (ring->me == 0) -		ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; -	else -		ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; +	ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;  	sdma_v4_0_wait_reg_mem(ring, 0, 1,  			       adev->nbio_funcs->get_hdp_flush_done_offset(adev), @@ -620,26 +804,27 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se   */  static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)  { -	struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; -	struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; +	struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];  	u32 rb_cntl, ib_cntl; -	int i; +	int i, unset = 0; + +	for (i = 0; i < adev->sdma.num_instances; i++) { +		sdma[i] = &adev->sdma.instance[i].ring; -	if ((adev->mman.buffer_funcs_ring == sdma0) || -	    (adev->mman.buffer_funcs_ring == sdma1)) +		if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {  			amdgpu_ttm_set_buffer_funcs_status(adev, false); +			unset = 1; +		} -	for (i = 0; i < adev->sdma.num_instances; i++) {  		rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);  		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);  		WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);  		ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);  		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);  		WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); -	} -	sdma0->sched.ready = false; -	sdma1->sched.ready = false; +		sdma[i]->sched.ready = false; +	}  }  /** @@ -663,16 +848,20 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)   */  static void sdma_v4_0_page_stop(struct amdgpu_device *adev)  { -	struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page; -	struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page; +	struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];  	u32 rb_cntl, ib_cntl;  	int i; - -	if ((adev->mman.buffer_funcs_ring == sdma0) || -	    (adev->mman.buffer_funcs_ring == sdma1)) -		amdgpu_ttm_set_buffer_funcs_status(adev, false); +	bool unset = false;  	for (i = 0; i < adev->sdma.num_instances; i++) { +		sdma[i] = &adev->sdma.instance[i].page; + +		if ((adev->mman.buffer_funcs_ring == sdma[i]) && +			(unset == false)) { +			amdgpu_ttm_set_buffer_funcs_status(adev, false); +			unset = true; +		} +  		rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);  		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,  					RB_ENABLE, 0); @@ -681,10 +870,9 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev)  		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,  					IB_ENABLE, 0);  		WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); -	} -	sdma0->sched.ready = false; -	sdma1->sched.ready = false; +		sdma[i]->sched.ready = false; +	}  }  /** @@ -1018,6 +1206,7 @@ static void sdma_v4_0_init_pg(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_RAVEN: +	case CHIP_RENOIR:  		sdma_v4_1_init_power_gating(adev);  		sdma_v4_1_update_power_gating(adev, true);  		break; @@ -1473,8 +1662,10 @@ static int sdma_v4_0_early_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	int r; -	if (adev->asic_type == CHIP_RAVEN) +	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR)  		adev->sdma.num_instances = 1; +	else if (adev->asic_type == CHIP_ARCTURUS) +		adev->sdma.num_instances = 8;  	else  		adev->sdma.num_instances = 2; @@ -1499,6 +1690,7 @@ static int sdma_v4_0_early_init(void *handle)  }  static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, +		struct ras_err_data *err_data,  		struct amdgpu_iv_entry *entry);  static int sdma_v4_0_late_init(void *handle) @@ -1518,7 +1710,7 @@ static int sdma_v4_0_late_init(void *handle)  		.sub_block_index = 0,  		.name = "sdma",  	}; -	int r; +	int r, i;  	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {  		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); @@ -1575,14 +1767,11 @@ static int sdma_v4_0_late_init(void *handle)  	if (r)  		goto sysfs;  resume: -	r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); -	if (r) -		goto irq; - -	r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); -	if (r) { -		amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); -		goto irq; +	for (i = 0; i < adev->sdma.num_instances; i++) { +		r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, +				   AMDGPU_SDMA_IRQ_INSTANCE0 + i); +		if (r) +			goto irq;  	}  	return 0; @@ -1606,28 +1795,22 @@ static int sdma_v4_0_sw_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	/* SDMA trap event */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP, -			      &adev->sdma.trap_irq); -	if (r) -		return r; - -	/* SDMA trap event */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP, -			      &adev->sdma.trap_irq); -	if (r) -		return r; - -	/* SDMA SRAM ECC event */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_SRAM_ECC, -			&adev->sdma.ecc_irq); -	if (r) -		return r; +	for (i = 0; i < adev->sdma.num_instances; i++) { +		r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), +				      SDMA0_4_0__SRCID__SDMA_TRAP, +				      &adev->sdma.trap_irq); +		if (r) +			return r; +	}  	/* SDMA SRAM ECC event */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_SRAM_ECC, -			&adev->sdma.ecc_irq); -	if (r) -		return r; +	for (i = 0; i < adev->sdma.num_instances; i++) { +		r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), +				      SDMA0_4_0__SRCID__SDMA_SRAM_ECC, +				      &adev->sdma.ecc_irq); +		if (r) +			return r; +	}  	for (i = 0; i < adev->sdma.num_instances; i++) {  		ring = &adev->sdma.instance[i].ring; @@ -1641,11 +1824,8 @@ static int sdma_v4_0_sw_init(void *handle)  		ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;  		sprintf(ring->name, "sdma%d", i); -		r = amdgpu_ring_init(adev, ring, 1024, -				     &adev->sdma.trap_irq, -				     (i == 0) ? -				     AMDGPU_SDMA_IRQ_INSTANCE0 : -				     AMDGPU_SDMA_IRQ_INSTANCE1); +		r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, +				     AMDGPU_SDMA_IRQ_INSTANCE0 + i);  		if (r)  			return r; @@ -1663,9 +1843,7 @@ static int sdma_v4_0_sw_init(void *handle)  			sprintf(ring->name, "page%d", i);  			r = amdgpu_ring_init(adev, ring, 1024,  					     &adev->sdma.trap_irq, -					     (i == 0) ? -					     AMDGPU_SDMA_IRQ_INSTANCE0 : -					     AMDGPU_SDMA_IRQ_INSTANCE1); +					     AMDGPU_SDMA_IRQ_INSTANCE0 + i);  			if (r)  				return r;  		} @@ -1701,10 +1879,7 @@ static int sdma_v4_0_sw_fini(void *handle)  			amdgpu_ring_fini(&adev->sdma.instance[i].page);  	} -	for (i = 0; i < adev->sdma.num_instances; i++) { -		release_firmware(adev->sdma.instance[i].fw); -		adev->sdma.instance[i].fw = NULL; -	} +	sdma_v4_0_destroy_inst_ctx(adev);  	return 0;  } @@ -1714,11 +1889,13 @@ static int sdma_v4_0_hw_init(void *handle)  	int r;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && -			adev->powerplay.pp_funcs->set_powergating_by_smu) +	if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && +			adev->powerplay.pp_funcs->set_powergating_by_smu) || +			adev->asic_type == CHIP_RENOIR)  		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); -	sdma_v4_0_init_golden_registers(adev); +	if (!amdgpu_sriov_vf(adev)) +		sdma_v4_0_init_golden_registers(adev);  	r = sdma_v4_0_start(adev); @@ -1728,18 +1905,22 @@ static int sdma_v4_0_hw_init(void *handle)  static int sdma_v4_0_hw_fini(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	int i;  	if (amdgpu_sriov_vf(adev))  		return 0; -	amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); -	amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); +	for (i = 0; i < adev->sdma.num_instances; i++) { +		amdgpu_irq_put(adev, &adev->sdma.ecc_irq, +			       AMDGPU_SDMA_IRQ_INSTANCE0 + i); +	}  	sdma_v4_0_ctx_switch_enable(adev, false);  	sdma_v4_0_enable(adev, false); -	if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs -			&& adev->powerplay.pp_funcs->set_powergating_by_smu) +	if ((adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs +			&& adev->powerplay.pp_funcs->set_powergating_by_smu) || +			adev->asic_type == CHIP_RENOIR)  		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);  	return 0; @@ -1776,15 +1957,17 @@ static bool sdma_v4_0_is_idle(void *handle)  static int sdma_v4_0_wait_for_idle(void *handle)  { -	unsigned i; -	u32 sdma0, sdma1; +	unsigned i, j; +	u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	for (i = 0; i < adev->usec_timeout; i++) { -		sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG); -		sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG); - -		if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) +		for (j = 0; j < adev->sdma.num_instances; j++) { +			sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG); +			if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK)) +				break; +		} +		if (j == adev->sdma.num_instances)  			return 0;  		udelay(1);  	} @@ -1820,17 +2003,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,  	uint32_t instance;  	DRM_DEBUG("IH: SDMA trap\n"); -	switch (entry->client_id) { -	case SOC15_IH_CLIENTID_SDMA0: -		instance = 0; -		break; -	case SOC15_IH_CLIENTID_SDMA1: -		instance = 1; -		break; -	default: -		return 0; -	} - +	instance = sdma_v4_0_irq_id_to_seq(entry->client_id);  	switch (entry->ring_id) {  	case 0:  		amdgpu_fence_process(&adev->sdma.instance[instance].ring); @@ -1851,20 +2024,15 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,  }  static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, +		struct ras_err_data *err_data,  		struct amdgpu_iv_entry *entry)  { -	uint32_t instance, err_source; +	uint32_t err_source; +	int instance; -	switch (entry->client_id) { -	case SOC15_IH_CLIENTID_SDMA0: -		instance = 0; -		break; -	case SOC15_IH_CLIENTID_SDMA1: -		instance = 1; -		break; -	default: +	instance = sdma_v4_0_irq_id_to_seq(entry->client_id); +	if (instance < 0)  		return 0; -	}  	switch (entry->src_id) {  	case SDMA0_4_0__SRCID__SDMA_SRAM_ECC: @@ -1881,7 +2049,7 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,  	amdgpu_ras_reset_gpu(adev, 0); -	return AMDGPU_RAS_UE; +	return AMDGPU_RAS_SUCCESS;  }  static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev, @@ -1910,16 +2078,9 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,  	DRM_ERROR("Illegal instruction in SDMA command stream\n"); -	switch (entry->client_id) { -	case SOC15_IH_CLIENTID_SDMA0: -		instance = 0; -		break; -	case SOC15_IH_CLIENTID_SDMA1: -		instance = 1; -		break; -	default: +	instance = sdma_v4_0_irq_id_to_seq(entry->client_id); +	if (instance < 0)  		return 0; -	}  	switch (entry->ring_id) {  	case 0: @@ -1936,14 +2097,10 @@ static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,  {  	u32 sdma_edc_config; -	u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ? -		sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_EDC_CONFIG) : -		sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_EDC_CONFIG); - -	sdma_edc_config = RREG32(reg_offset); +	sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG);  	sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE,  		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); -	WREG32(reg_offset, sdma_edc_config); +	WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config);  	return 0;  } @@ -1953,61 +2110,35 @@ static void sdma_v4_0_update_medium_grain_clock_gating(  		bool enable)  {  	uint32_t data, def; +	int i;  	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { -		/* enable sdma0 clock gating */ -		def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); -		data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | -			  SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | -			  SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | -			  SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | -			  SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | -			  SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | -			  SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | -			  SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); -		if (def != data) -			WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - -		if (adev->sdma.num_instances > 1) { -			def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); -			data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | -				  SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | -				  SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | -				  SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | -				  SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | -				  SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | -				  SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | -				  SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); +		for (i = 0; i < adev->sdma.num_instances; i++) { +			def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); +			data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | +				  SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | +				  SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | +				  SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | +				  SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | +				  SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | +				  SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | +				  SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);  			if (def != data) -				WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); +				WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);  		}  	} else { -		/* disable sdma0 clock gating */ -		def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); -		data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | -			 SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | -			 SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | -			 SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | -			 SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | -			 SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | -			 SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | -			 SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); - -		if (def != data) -			WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - -		if (adev->sdma.num_instances > 1) { -			def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); -			data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | -				 SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | -				 SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | -				 SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | -				 SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | -				 SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | -				 SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | -				 SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); +		for (i = 0; i < adev->sdma.num_instances; i++) { +			def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); +			data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | +				 SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | +				 SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | +				 SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | +				 SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | +				 SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | +				 SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | +				 SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK);  			if (def != data) -				WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); +				WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data);  		}  	}  } @@ -2018,34 +2149,23 @@ static void sdma_v4_0_update_medium_grain_light_sleep(  		bool enable)  {  	uint32_t data, def; +	int i;  	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { -		/* 1-not override: enable sdma0 mem light sleep */ -		def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); -		data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; -		if (def != data) -			WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); - -		/* 1-not override: enable sdma1 mem light sleep */ -		if (adev->sdma.num_instances > 1) { -			def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); -			data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; +		for (i = 0; i < adev->sdma.num_instances; i++) { +			/* 1-not override: enable sdma mem light sleep */ +			def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL); +			data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;  			if (def != data) -				WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); +				WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);  		}  	} else { -		/* 0-override:disable sdma0 mem light sleep */ -		def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); -		data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; -		if (def != data) -			WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); - -		/* 0-override:disable sdma1 mem light sleep */ -		if (adev->sdma.num_instances > 1) { -			def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); -			data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; +		for (i = 0; i < adev->sdma.num_instances; i++) { +		/* 0-override:disable sdma mem light sleep */ +			def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL); +			data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;  			if (def != data) -				WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); +				WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data);  		}  	}  } @@ -2063,6 +2183,8 @@ static int sdma_v4_0_set_clockgating_state(void *handle,  	case CHIP_VEGA12:  	case CHIP_VEGA20:  	case CHIP_RAVEN: +	case CHIP_ARCTURUS: +	case CHIP_RENOIR:  		sdma_v4_0_update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE ? true : false);  		sdma_v4_0_update_medium_grain_light_sleep(adev, @@ -2133,7 +2255,43 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {  	.align_mask = 0xf,  	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),  	.support_64bit_ptrs = true, -	.vmhub = AMDGPU_MMHUB, +	.vmhub = AMDGPU_MMHUB_0, +	.get_rptr = sdma_v4_0_ring_get_rptr, +	.get_wptr = sdma_v4_0_ring_get_wptr, +	.set_wptr = sdma_v4_0_ring_set_wptr, +	.emit_frame_size = +		6 + /* sdma_v4_0_ring_emit_hdp_flush */ +		3 + /* hdp invalidate */ +		6 + /* sdma_v4_0_ring_emit_pipeline_sync */ +		/* sdma_v4_0_ring_emit_vm_flush */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + +		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ +	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ +	.emit_ib = sdma_v4_0_ring_emit_ib, +	.emit_fence = sdma_v4_0_ring_emit_fence, +	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, +	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, +	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, +	.test_ring = sdma_v4_0_ring_test_ring, +	.test_ib = sdma_v4_0_ring_test_ib, +	.insert_nop = sdma_v4_0_ring_insert_nop, +	.pad_ib = sdma_v4_0_ring_pad_ib, +	.emit_wreg = sdma_v4_0_ring_emit_wreg, +	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, +	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/* + * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1). + * So create a individual constant ring_funcs for those instances. + */ +static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { +	.type = AMDGPU_RING_TYPE_SDMA, +	.align_mask = 0xf, +	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), +	.support_64bit_ptrs = true, +	.vmhub = AMDGPU_MMHUB_1,  	.get_rptr = sdma_v4_0_ring_get_rptr,  	.get_wptr = sdma_v4_0_ring_get_wptr,  	.set_wptr = sdma_v4_0_ring_set_wptr, @@ -2165,7 +2323,39 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {  	.align_mask = 0xf,  	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),  	.support_64bit_ptrs = true, -	.vmhub = AMDGPU_MMHUB, +	.vmhub = AMDGPU_MMHUB_0, +	.get_rptr = sdma_v4_0_ring_get_rptr, +	.get_wptr = sdma_v4_0_page_ring_get_wptr, +	.set_wptr = sdma_v4_0_page_ring_set_wptr, +	.emit_frame_size = +		6 + /* sdma_v4_0_ring_emit_hdp_flush */ +		3 + /* hdp invalidate */ +		6 + /* sdma_v4_0_ring_emit_pipeline_sync */ +		/* sdma_v4_0_ring_emit_vm_flush */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + +		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ +	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ +	.emit_ib = sdma_v4_0_ring_emit_ib, +	.emit_fence = sdma_v4_0_ring_emit_fence, +	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, +	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, +	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, +	.test_ring = sdma_v4_0_ring_test_ring, +	.test_ib = sdma_v4_0_ring_test_ib, +	.insert_nop = sdma_v4_0_ring_insert_nop, +	.pad_ib = sdma_v4_0_ring_pad_ib, +	.emit_wreg = sdma_v4_0_ring_emit_wreg, +	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, +	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = { +	.type = AMDGPU_RING_TYPE_SDMA, +	.align_mask = 0xf, +	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), +	.support_64bit_ptrs = true, +	.vmhub = AMDGPU_MMHUB_1,  	.get_rptr = sdma_v4_0_ring_get_rptr,  	.get_wptr = sdma_v4_0_page_ring_get_wptr,  	.set_wptr = sdma_v4_0_page_ring_set_wptr, @@ -2197,10 +2387,20 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)  	int i;  	for (i = 0; i < adev->sdma.num_instances; i++) { -		adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; +		if (adev->asic_type == CHIP_ARCTURUS && i >= 5) +			adev->sdma.instance[i].ring.funcs = +					&sdma_v4_0_ring_funcs_2nd_mmhub; +		else +			adev->sdma.instance[i].ring.funcs = +					&sdma_v4_0_ring_funcs;  		adev->sdma.instance[i].ring.me = i;  		if (adev->sdma.has_page_queue) { -			adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; +			if (adev->asic_type == CHIP_ARCTURUS && i >= 5) +				adev->sdma.instance[i].page.funcs = +					&sdma_v4_0_page_ring_funcs_2nd_mmhub; +			else +				adev->sdma.instance[i].page.funcs = +					&sdma_v4_0_page_ring_funcs;  			adev->sdma.instance[i].page.me = i;  		}  	} @@ -2224,10 +2424,23 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {  static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)  { -	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; +	switch (adev->sdma.num_instances) { +	case 1: +		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; +		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; +		break; +	case 8: +		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; +		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; +		break; +	case 2: +	default: +		adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; +		adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; +		break; +	}  	adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;  	adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs; -	adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;  	adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;  } @@ -2293,8 +2506,8 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = {  static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)  {  	adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; -	if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1) -		adev->mman.buffer_funcs_ring = &adev->sdma.instance[1].page; +	if (adev->sdma.has_page_queue) +		adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;  	else  		adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;  } @@ -2313,22 +2526,15 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)  	unsigned i;  	adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; -	if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1) { -		for (i = 1; i < adev->sdma.num_instances; i++) { +	for (i = 0; i < adev->sdma.num_instances; i++) { +		if (adev->sdma.has_page_queue)  			sched = &adev->sdma.instance[i].page.sched; -			adev->vm_manager.vm_pte_rqs[i - 1] = -				&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; -		} -		adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances - 1; -		adev->vm_manager.page_fault = &adev->sdma.instance[0].page; -	} else { -		for (i = 0; i < adev->sdma.num_instances; i++) { +		else  			sched = &adev->sdma.instance[i].ring.sched; -			adev->vm_manager.vm_pte_rqs[i] = -				&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; -		} -		adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; +		adev->vm_manager.vm_pte_rqs[i] = +			&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];  	} +	adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;  }  const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 3747c3f1f0cc..fa2f70ce2e2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -21,8 +21,11 @@   *   */ +#include <linux/delay.h>  #include <linux/firmware.h> -#include <drm/drmP.h> +#include <linux/module.h> +#include <linux/pci.h> +  #include "amdgpu.h"  #include "amdgpu_ucode.h"  #include "amdgpu_trace.h" @@ -42,6 +45,12 @@  MODULE_FIRMWARE("amdgpu/navi10_sdma.bin");  MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); + +MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); +  #define SDMA1_REG_OFFSET 0x600  #define SDMA0_HYP_DEC_REG_START 0x5880  #define SDMA0_HYP_DEC_REG_END 0x5893 @@ -59,7 +68,7 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = {  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), -	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), @@ -71,7 +80,7 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = {  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), -	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), @@ -80,6 +89,18 @@ static const struct soc15_reg_golden golden_settings_sdma_5[] = {  };  static const struct soc15_reg_golden golden_settings_sdma_nv10[] = { +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv14[] = { +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv12[] = { +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),  };  static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) @@ -111,6 +132,22 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev)  						golden_settings_sdma_nv10,  						(const u32)ARRAY_SIZE(golden_settings_sdma_nv10));  		break; +	case CHIP_NAVI14: +		soc15_program_register_sequence(adev, +						golden_settings_sdma_5, +						(const u32)ARRAY_SIZE(golden_settings_sdma_5)); +		soc15_program_register_sequence(adev, +						golden_settings_sdma_nv14, +						(const u32)ARRAY_SIZE(golden_settings_sdma_nv14)); +		break; +	case CHIP_NAVI12: +		soc15_program_register_sequence(adev, +						golden_settings_sdma_5, +						(const u32)ARRAY_SIZE(golden_settings_sdma_5)); +		soc15_program_register_sequence(adev, +						golden_settings_sdma_nv12, +						(const u32)ARRAY_SIZE(golden_settings_sdma_nv12)); +		break;  	default:  		break;  	} @@ -143,6 +180,12 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)  	case CHIP_NAVI10:  		chip_name = "navi10";  		break; +	case CHIP_NAVI14: +		chip_name = "navi14"; +		break; +	case CHIP_NAVI12: +		chip_name = "navi12"; +		break;  	default:  		BUG();  	} @@ -861,7 +904,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)  		if (amdgpu_emu_mode == 1)  			msleep(1);  		else -			DRM_UDELAY(1); +			udelay(1);  	}  	if (i < adev->usec_timeout) { @@ -1316,7 +1359,7 @@ static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring)  		if (ring->trail_seq ==  		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))  			break; -		DRM_UDELAY(1); +		udelay(1);  	}  	if (i >= adev->usec_timeout) { @@ -1472,6 +1515,8 @@ static int sdma_v5_0_set_clockgating_state(void *handle,  	switch (adev->asic_type) {  	case CHIP_NAVI10: +	case CHIP_NAVI14: +	case CHIP_NAVI12:  		sdma_v5_0_update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE ? true : false);  		sdma_v5_0_update_medium_grain_light_sleep(adev, @@ -1532,7 +1577,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {  	.align_mask = 0xf,  	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),  	.support_64bit_ptrs = true, -	.vmhub = AMDGPU_GFXHUB, +	.vmhub = AMDGPU_GFXHUB_0,  	.get_rptr = sdma_v5_0_ring_get_rptr,  	.get_wptr = sdma_v5_0_ring_get_wptr,  	.set_wptr = sdma_v5_0_ring_set_wptr, @@ -1583,7 +1628,8 @@ static const struct amdgpu_irq_src_funcs sdma_v5_0_illegal_inst_irq_funcs = {  static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev)  { -	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; +	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + +					adev->sdma.num_instances;  	adev->sdma.trap_irq.funcs = &sdma_v5_0_trap_irq_funcs;  	adev->sdma.illegal_inst_irq.funcs = &sdma_v5_0_illegal_inst_irq_funcs;  } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 4d74453f3cfb..493af42152f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1186,6 +1186,12 @@ static int si_asic_reset(struct amdgpu_device *adev)  	return 0;  } +static enum amd_reset_method +si_asic_reset_method(struct amdgpu_device *adev) +{ +	return AMD_RESET_METHOD_LEGACY; +} +  static u32 si_get_config_memsize(struct amdgpu_device *adev)  {  	return RREG32(mmCONFIG_MEMSIZE); @@ -1394,6 +1400,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =  	.read_bios_from_rom = &si_read_bios_from_rom,  	.read_register = &si_read_register,  	.reset = &si_asic_reset, +	.reset_method = &si_asic_reset_method,  	.set_vga_state = &si_vga_set_state,  	.get_xclk = &si_get_xclk,  	.set_uvd_clocks = &si_set_uvd_clocks, @@ -1881,7 +1888,7 @@ static void si_program_aspm(struct amdgpu_device *adev)  			if (orig != data)  				si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data); -			if ((adev->family != CHIP_OLAND) && (adev->family != CHIP_HAINAN)) { +			if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) {  				orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0);  				data &= ~PLL_RAMP_UP_TIME_0_MASK;  				if (orig != data) @@ -1930,14 +1937,14 @@ static void si_program_aspm(struct amdgpu_device *adev)  			orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL);  			data &= ~LS2_EXIT_TIME_MASK; -			if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN)) +			if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))  				data |= LS2_EXIT_TIME(5);  			if (orig != data)  				si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data);  			orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL);  			data &= ~LS2_EXIT_TIME_MASK; -			if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN)) +			if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN))  				data |= LS2_EXIT_TIME(5);  			if (orig != data)  				si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data); diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c new file mode 100644 index 000000000000..c44723c267c9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -0,0 +1,732 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "smuio/smuio_11_0_0_offset.h" +#include "smuio/smuio_11_0_0_sh_mask.h" + +#include "smu_v11_0_i2c.h" +#include "amdgpu.h" +#include "soc15_common.h" +#include <drm/drm_fixed.h> +#include <drm/drm_drv.h> +#include "amdgpu_amdkfd.h" +#include <linux/i2c.h> +#include <linux/pci.h> +#include "amdgpu_ras.h" + +/* error codes */ +#define I2C_OK                0 +#define I2C_NAK_7B_ADDR_NOACK 1 +#define I2C_NAK_TXDATA_NOACK  2 +#define I2C_TIMEOUT           4 +#define I2C_SW_TIMEOUT        8 +#define I2C_ABORT             0x10 + +/* I2C transaction flags */ +#define I2C_NO_STOP	1 +#define I2C_RESTART	2 + +#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control.eeprom_accessor))->adev +#define to_eeprom_control(x) container_of(x, struct amdgpu_ras_eeprom_control, eeprom_accessor) + +static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); +	uint32_t reg = RREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT); + +	reg = REG_SET_FIELD(reg, SMUIO_PWRMGT, i2c_clk_gate_en, en ? 1 : 0); +	WREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT, reg); +} + + +static void smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); + +	WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0); +} + +static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); +	/* do */ +	{ +		RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_INTR); + +	} /* while (reg_CKSVII2C_ic_clr_intr == 0) */ +} + +static void smu_v11_0_i2c_configure(struct i2c_adapter *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); +	uint32_t reg = 0; + +	reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_SLAVE_DISABLE, 1); +	reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_RESTART_EN, 1); +	reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_MASTER, 0); +	reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_SLAVE, 0); +	/* Standard mode */ +	reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE, 2); +	reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MASTER_MODE, 1); + +	WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CON, reg); +} + +static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); + +	/* +	 * Standard mode speed, These values are taken from SMUIO MAS, +	 * but are different from what is given is +	 * Synopsys spec. The values here are based on assumption +	 * that refclock is 100MHz +	 * +	 * Configuration for standard mode; Speed = 100kbps +	 * Scale linearly, for now only support standard speed clock +	 * This will work only with 100M ref clock +	 * +	 * TBD:Change the calculation to take into account ref clock values also. +	 */ + +	WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_FS_SPKLEN, 2); +	WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_HCNT, 120); +	WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_LCNT, 130); +	WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SDA_HOLD, 20); +} + +static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, uint8_t address) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); + +	/* Convert fromr 8-bit to 7-bit address */ +	address >>= 1; +	WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, (address & 0xFF)); +} + +static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); +	uint32_t ret = I2C_OK; +	uint32_t reg, reg_c_tx_abrt_source; + +	/*Check if transmission is completed */ +	unsigned long  timeout_counter = jiffies + msecs_to_jiffies(20); + +	do { +		if (time_after(jiffies, timeout_counter)) { +			ret |= I2C_SW_TIMEOUT; +			break; +		} + +		reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + +	} while (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFE) == 0); + +	if (ret != I2C_OK) +		return ret; + +	/* This only checks if NAK is received and transaction got aborted */ +	reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_INTR_STAT); + +	if (REG_GET_FIELD(reg, CKSVII2C_IC_INTR_STAT, R_TX_ABRT) == 1) { +		reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE); +		DRM_INFO("TX was terminated, IC_TX_ABRT_SOURCE val is:%x", reg_c_tx_abrt_source); + +		/* Check for stop due to NACK */ +		if (REG_GET_FIELD(reg_c_tx_abrt_source, +				  CKSVII2C_IC_TX_ABRT_SOURCE, +				  ABRT_TXDATA_NOACK) == 1) { + +			ret |= I2C_NAK_TXDATA_NOACK; + +		} else if (REG_GET_FIELD(reg_c_tx_abrt_source, +					 CKSVII2C_IC_TX_ABRT_SOURCE, +					 ABRT_7B_ADDR_NOACK) == 1) { + +			ret |= I2C_NAK_7B_ADDR_NOACK; +		} else { +			ret |= I2C_ABORT; +		} + +		smu_v11_0_i2c_clear_status(control); +	} + +	return ret; +} + +static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); +	uint32_t ret = I2C_OK; +	uint32_t reg_ic_status, reg_c_tx_abrt_source; + +	reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE); + +	/* If slave is not present */ +	if (REG_GET_FIELD(reg_c_tx_abrt_source, +			  CKSVII2C_IC_TX_ABRT_SOURCE, +			  ABRT_7B_ADDR_NOACK) == 1) { +		ret |= I2C_NAK_7B_ADDR_NOACK; + +		smu_v11_0_i2c_clear_status(control); +	} else {  /* wait till some data is there in RXFIFO */ +		/* Poll for some byte in RXFIFO */ +		unsigned long  timeout_counter = jiffies + msecs_to_jiffies(20); + +		do { +			if (time_after(jiffies, timeout_counter)) { +				ret |= I2C_SW_TIMEOUT; +				break; +			} + +			reg_ic_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + +		} while (REG_GET_FIELD(reg_ic_status, CKSVII2C_IC_STATUS, RFNE) == 0); +	} + +	return ret; +} + + + + +/** + * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device. + * + * @address: The I2C address of the slave device. + * @data: The data to transmit over the bus. + * @numbytes: The amount of data to transmit. + * @i2c_flag: Flags for transmission + * + * Returns 0 on success or error. + */ +static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, +				  uint8_t address, uint8_t *data, +				  uint32_t numbytes, uint32_t i2c_flag) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); +	uint32_t bytes_sent, reg, ret = 0; +	unsigned long  timeout_counter; + +	bytes_sent = 0; + +	DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ", +		 (uint16_t)address, numbytes); + +	if (drm_debug & DRM_UT_DRIVER) { +		print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, +			       16, 1, data, numbytes, false); +	} + +	/* Set the I2C slave address */ +	smu_v11_0_i2c_set_address(control, address); +	/* Enable I2C */ +	smu_v11_0_i2c_enable(control, true); + +	/* Clear status bits */ +	smu_v11_0_i2c_clear_status(control); + + +	timeout_counter = jiffies + msecs_to_jiffies(20); + +	while (numbytes > 0) { +		reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); +		if (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) { +			do { +				reg = 0; +				/* +				 * Prepare transaction, no need to set RESTART. I2C engine will send +				 * START as soon as it sees data in TXFIFO +				 */ +				if (bytes_sent == 0) +					reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, +							    (i2c_flag & I2C_RESTART) ? 1 : 0); +				reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, data[bytes_sent]); + +				/* determine if we need to send STOP bit or not */ +				if (numbytes == 1) +					/* Final transaction, so send stop unless I2C_NO_STOP */ +					reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, +							    (i2c_flag & I2C_NO_STOP) ? 0 : 1); +				/* Write */ +				reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0); +				WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); + +				/* Record that the bytes were transmitted */ +				bytes_sent++; +				numbytes--; + +				reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + +			} while (numbytes &&  REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)); +		} + +		/* +		 * We waited too long for the transmission FIFO to become not-full. +		 * Exit the loop with error. +		 */ +		if (time_after(jiffies, timeout_counter)) { +			ret |= I2C_SW_TIMEOUT; +			goto Err; +		} +	} + +	ret = smu_v11_0_i2c_poll_tx_status(control); + +Err: +	/* Any error, no point in proceeding */ +	if (ret != I2C_OK) { +		if (ret & I2C_SW_TIMEOUT) +			DRM_ERROR("TIMEOUT ERROR !!!"); + +		if (ret & I2C_NAK_7B_ADDR_NOACK) +			DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!"); + + +		if (ret & I2C_NAK_TXDATA_NOACK) +			DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!"); +	} + +	return ret; +} + + +/** + * smu_v11_0_i2c_receive - Receive a block of data over the I2C bus from a slave device. + * + * @address: The I2C address of the slave device. + * @numbytes: The amount of data to transmit. + * @i2c_flag: Flags for transmission + * + * Returns 0 on success or error. + */ +static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, +				 uint8_t address, uint8_t *data, +				 uint32_t numbytes, uint8_t i2c_flag) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); +	uint32_t bytes_received, ret = I2C_OK; + +	bytes_received = 0; + +	/* Set the I2C slave address */ +	smu_v11_0_i2c_set_address(control, address); + +	/* Enable I2C */ +	smu_v11_0_i2c_enable(control, true); + +	while (numbytes > 0) { +		uint32_t reg = 0; + +		smu_v11_0_i2c_clear_status(control); + + +		/* Prepare transaction */ + +		/* Each time we disable I2C, so this is not a restart */ +		if (bytes_received == 0) +			reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, +					    (i2c_flag & I2C_RESTART) ? 1 : 0); + +		reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, 0); +		/* Read */ +		reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 1); + +		/* Transmitting last byte */ +		if (numbytes == 1) +			/* Final transaction, so send stop if requested */ +			reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, +					    (i2c_flag & I2C_NO_STOP) ? 0 : 1); + +		WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); + +		ret = smu_v11_0_i2c_poll_rx_status(control); + +		/* Any error, no point in proceeding */ +		if (ret != I2C_OK) { +			if (ret & I2C_SW_TIMEOUT) +				DRM_ERROR("TIMEOUT ERROR !!!"); + +			if (ret & I2C_NAK_7B_ADDR_NOACK) +				DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!"); + +			if (ret & I2C_NAK_TXDATA_NOACK) +				DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!"); + +			break; +		} + +		reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD); +		data[bytes_received] = REG_GET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT); + +		/* Record that the bytes were received */ +		bytes_received++; +		numbytes--; +	} + +	DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :", +		  (uint16_t)address, bytes_received); + +	if (drm_debug & DRM_UT_DRIVER) { +		print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, +			       16, 1, data, bytes_received, false); +	} + +	return ret; +} + +static void smu_v11_0_i2c_abort(struct i2c_adapter *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); +	uint32_t reg = 0; + +	/* Enable I2C engine; */ +	reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ENABLE, 1); +	WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg); + +	/* Abort previous transaction */ +	reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ABORT, 1); +	WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg); + +	DRM_DEBUG_DRIVER("I2C_Abort() Done."); +} + + +static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); + +	const uint32_t IDLE_TIMEOUT = 1024; +	uint32_t timeout_count = 0; +	uint32_t reg_ic_enable, reg_ic_enable_status, reg_ic_clr_activity; + +	reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); +	reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); + + +	if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && +	    (REG_GET_FIELD(reg_ic_enable_status, CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { +		/* +		 * Nobody is using I2C engine, but engine remains active because +		 * someone missed to send STOP +		 */ +		smu_v11_0_i2c_abort(control); +	} else if (REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) { +		/* Nobody is using I2C engine */ +		return true; +	} + +	/* Keep reading activity bit until it's cleared */ +	do { +		reg_ic_clr_activity = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_ACTIVITY); + +		if (REG_GET_FIELD(reg_ic_clr_activity, +		    CKSVII2C_IC_CLR_ACTIVITY, CLR_ACTIVITY) == 0) +			return true; + +		++timeout_count; + +	} while (timeout_count < IDLE_TIMEOUT); + +	return false; +} + +static void smu_v11_0_i2c_init(struct i2c_adapter *control) +{ +	/* Disable clock gating */ +	smu_v11_0_i2c_set_clock_gating(control, false); + +	if (!smu_v11_0_i2c_activity_done(control)) +		DRM_WARN("I2C busy !"); + +	/* Disable I2C */ +	smu_v11_0_i2c_enable(control, false); + +	/* Configure I2C to operate as master and in standard mode */ +	smu_v11_0_i2c_configure(control); + +	/* Initialize the clock to 50 kHz default */ +	smu_v11_0_i2c_set_clock(control); + +} + +static void smu_v11_0_i2c_fini(struct i2c_adapter *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); +	uint32_t reg_ic_enable_status, reg_ic_enable; + +	smu_v11_0_i2c_enable(control, false); + +	/* Double check if disabled, else force abort */ +	reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); +	reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); + +	if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && +	    (REG_GET_FIELD(reg_ic_enable_status, +			   CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { +		/* +		 * Nobody is using I2C engine, but engine remains active because +		 * someone missed to send STOP +		 */ +		smu_v11_0_i2c_abort(control); +	} + +	/* Restore clock gating */ + +	/* +	 * TODO Reenabling clock gating seems to break subsequent SMU operation +	 *      on the I2C bus. My guess is that SMU doesn't disable clock gating like +	 *      we do here before working with the bus. So for now just don't restore +	 *      it but later work with SMU to see if they have this issue and can +	 *      update their code appropriately +	 */ +	/* smu_v11_0_i2c_set_clock_gating(control, true); */ + +} + +static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); + +	/* Send  PPSMC_MSG_RequestI2CBus */ +	if (!adev->powerplay.pp_funcs->smu_i2c_bus_access) +		goto Fail; + + +	if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle, true)) +		return true; + +Fail: +	return false; +} + +static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); + +	/* Send  PPSMC_MSG_RequestI2CBus */ +	if (!adev->powerplay.pp_funcs->smu_i2c_bus_access) +		goto Fail; + +	/* Send  PPSMC_MSG_ReleaseI2CBus */ +	if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle, +							     false)) +		return true; + +Fail: +	return false; +} + +/***************************** EEPROM I2C GLUE ****************************/ + +static uint32_t smu_v11_0_i2c_eeprom_read_data(struct i2c_adapter *control, +					       uint8_t address, +					       uint8_t *data, +					       uint32_t numbytes) +{ +	uint32_t  ret = 0; + +	/* First 2 bytes are dummy write to set EEPROM address */ +	ret = smu_v11_0_i2c_transmit(control, address, data, 2, I2C_NO_STOP); +	if (ret != I2C_OK) +		goto Fail; + +	/* Now read data starting with that address */ +	ret = smu_v11_0_i2c_receive(control, address, data + 2, numbytes - 2, +				    I2C_RESTART); + +Fail: +	if (ret != I2C_OK) +		DRM_ERROR("ReadData() - I2C error occurred :%x", ret); + +	return ret; +} + +static uint32_t smu_v11_0_i2c_eeprom_write_data(struct i2c_adapter *control, +						uint8_t address, +						uint8_t *data, +						uint32_t numbytes) +{ +	uint32_t  ret; + +	ret = smu_v11_0_i2c_transmit(control, address, data, numbytes, 0); + +	if (ret != I2C_OK) +		DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret); +	else +		/* +		 * According to EEPROM spec there is a MAX of 10 ms required for +		 * EEPROM to flush internal RX buffer after STOP was issued at the +		 * end of write transaction. During this time the EEPROM will not be +		 * responsive to any more commands - so wait a bit more. +		 * +		 * TODO Improve to wait for first ACK for slave address after +		 * internal write cycle done. +		 */ +		msleep(10); + +	return ret; + +} + +static void lock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ +	struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c); + +	if (!smu_v11_0_i2c_bus_lock(i2c)) { +		DRM_ERROR("Failed to lock the bus from SMU"); +		return; +	} + +	control->bus_locked = true; +} + +static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ +	WARN_ONCE(1, "This operation not supposed to run in atomic context!"); +	return false; +} + +static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ +	struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c); + +	if (!smu_v11_0_i2c_bus_unlock(i2c)) { +		DRM_ERROR("Failed to unlock the bus from SMU"); +		return; +	} + +	control->bus_locked = false; +} + +static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { +	.lock_bus = lock_bus, +	.trylock_bus = trylock_bus, +	.unlock_bus = unlock_bus, +}; + +static int smu_v11_0_i2c_eeprom_i2c_xfer(struct i2c_adapter *i2c_adap, +			      struct i2c_msg *msgs, int num) +{ +	int i, ret; +	struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c_adap); + +	if (!control->bus_locked) { +		DRM_ERROR("I2C bus unlocked, stopping transaction!"); +		return -EIO; +	} + +	smu_v11_0_i2c_init(i2c_adap); + +	for (i = 0; i < num; i++) { +		if (msgs[i].flags & I2C_M_RD) +			ret = smu_v11_0_i2c_eeprom_read_data(i2c_adap, +							(uint8_t)msgs[i].addr, +							msgs[i].buf, msgs[i].len); +		else +			ret = smu_v11_0_i2c_eeprom_write_data(i2c_adap, +							 (uint8_t)msgs[i].addr, +							 msgs[i].buf, msgs[i].len); + +		if (ret != I2C_OK) { +			num = -EIO; +			break; +		} +	} + +	smu_v11_0_i2c_fini(i2c_adap); +	return num; +} + +static u32 smu_v11_0_i2c_eeprom_i2c_func(struct i2c_adapter *adap) +{ +	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + + +static const struct i2c_algorithm smu_v11_0_i2c_eeprom_i2c_algo = { +	.master_xfer = smu_v11_0_i2c_eeprom_i2c_xfer, +	.functionality = smu_v11_0_i2c_eeprom_i2c_func, +}; + +int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control) +{ +	struct amdgpu_device *adev = to_amdgpu_device(control); +	int res; + +	control->owner = THIS_MODULE; +	control->class = I2C_CLASS_SPD; +	control->dev.parent = &adev->pdev->dev; +	control->algo = &smu_v11_0_i2c_eeprom_i2c_algo; +	snprintf(control->name, sizeof(control->name), "RAS EEPROM"); +	control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops; + +	res = i2c_add_adapter(control); +	if (res) +		DRM_ERROR("Failed to register hw i2c, err: %d\n", res); + +	return res; +} + +void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control) +{ +	i2c_del_adapter(control); +} + +/* + * Keep this for future unit test if bugs arise + */ +#if 0 +#define I2C_TARGET_ADDR 0xA0 + +bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control) +{ + +	uint32_t ret = I2C_OK; +	uint8_t data[6] = {0xf, 0, 0xde, 0xad, 0xbe, 0xef}; + + +	DRM_INFO("Begin"); + +	if (!smu_v11_0_i2c_bus_lock(control)) { +		DRM_ERROR("Failed to lock the bus!."); +		return false; +	} + +	smu_v11_0_i2c_init(control); + +	/* Write 0xde to address 0x0000 on the EEPROM */ +	ret = smu_v11_0_i2c_eeprom_write_data(control, I2C_TARGET_ADDR, data, 6); + +	ret = smu_v11_0_i2c_eeprom_read_data(control, I2C_TARGET_ADDR, data, 6); + +	smu_v11_0_i2c_fini(control); + +	smu_v11_0_i2c_bus_unlock(control); + + +	DRM_INFO("End"); +	return true; +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h new file mode 100644 index 000000000000..973f28d68e70 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h @@ -0,0 +1,34 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU_V11_I2C_CONTROL_H +#define SMU_V11_I2C_CONTROL_H + +#include <linux/types.h> + +struct i2c_adapter; + +int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control); +void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 23265414d448..f70658a536a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -63,6 +63,8 @@  #include "uvd_v7_0.h"  #include "vce_v4_0.h"  #include "vcn_v1_0.h" +#include "vcn_v2_0.h" +#include "vcn_v2_5.h"  #include "dce_virtual.h"  #include "mxgpu_ai.h"  #include "amdgpu_smu.h" @@ -115,6 +117,49 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);  } +static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg) +{ +	unsigned long flags, address, data; +	u64 r; +	address = adev->nbio_funcs->get_pcie_index_offset(adev); +	data = adev->nbio_funcs->get_pcie_data_offset(adev); + +	spin_lock_irqsave(&adev->pcie_idx_lock, flags); +	/* read low 32 bit */ +	WREG32(address, reg); +	(void)RREG32(address); +	r = RREG32(data); + +	/* read high 32 bit*/ +	WREG32(address, reg + 4); +	(void)RREG32(address); +	r |= ((u64)RREG32(data) << 32); +	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +	return r; +} + +static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) +{ +	unsigned long flags, address, data; + +	address = adev->nbio_funcs->get_pcie_index_offset(adev); +	data = adev->nbio_funcs->get_pcie_data_offset(adev); + +	spin_lock_irqsave(&adev->pcie_idx_lock, flags); +	/* write low 32 bit */ +	WREG32(address, reg); +	(void)RREG32(address); +	WREG32(data, (u32)(v & 0xffffffffULL)); +	(void)RREG32(data); + +	/* write high 32 bit */ +	WREG32(address, reg + 4); +	(void)RREG32(address); +	WREG32(data, (u32)(v >> 32)); +	(void)RREG32(data); +	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} +  static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg)  {  	unsigned long flags, address, data; @@ -464,12 +509,23 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)  	return 0;  } -static int soc15_asic_reset(struct amdgpu_device *adev) +static int soc15_mode2_reset(struct amdgpu_device *adev) +{ +	if (!adev->powerplay.pp_funcs || +	    !adev->powerplay.pp_funcs->asic_reset_mode_2) +		return -ENOENT; + +	return adev->powerplay.pp_funcs->asic_reset_mode_2(adev->powerplay.pp_handle); +} + +static enum amd_reset_method +soc15_asic_reset_method(struct amdgpu_device *adev)  { -	int ret;  	bool baco_reset;  	switch (adev->asic_type) { +	case CHIP_RAVEN: +		return AMD_RESET_METHOD_MODE2;  	case CHIP_VEGA10:  	case CHIP_VEGA12:  		soc15_asic_get_baco_capability(adev, &baco_reset); @@ -493,11 +549,23 @@ static int soc15_asic_reset(struct amdgpu_device *adev)  	}  	if (baco_reset) -		ret = soc15_asic_baco_reset(adev); +		return AMD_RESET_METHOD_BACO;  	else -		ret = soc15_asic_mode1_reset(adev); +		return AMD_RESET_METHOD_MODE1; +} -	return ret; +static int soc15_asic_reset(struct amdgpu_device *adev) +{ +	switch (soc15_asic_reset_method(adev)) { +		case AMD_RESET_METHOD_BACO: +			amdgpu_inc_vram_lost(adev); +			return soc15_asic_baco_reset(adev); +		case AMD_RESET_METHOD_MODE2: +			return soc15_mode2_reset(adev); +		default: +			amdgpu_inc_vram_lost(adev); +			return soc15_asic_mode1_reset(adev); +	}  }  /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock, @@ -581,26 +649,31 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)  	case CHIP_VEGA10:  	case CHIP_VEGA12:  	case CHIP_RAVEN: +	case CHIP_RENOIR:  		vega10_reg_base_init(adev);  		break;  	case CHIP_VEGA20:  		vega20_reg_base_init(adev);  		break; +	case CHIP_ARCTURUS: +		arct_reg_base_init(adev); +		break;  	default:  		return -EINVAL;  	} -	if (adev->asic_type == CHIP_VEGA20) +	if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)  		adev->gmc.xgmi.supported = true;  	if (adev->flags & AMD_IS_APU)  		adev->nbio_funcs = &nbio_v7_0_funcs; -	else if (adev->asic_type == CHIP_VEGA20) +	else if (adev->asic_type == CHIP_VEGA20 || +		adev->asic_type == CHIP_ARCTURUS)  		adev->nbio_funcs = &nbio_v7_4_funcs;  	else  		adev->nbio_funcs = &nbio_v6_1_funcs; -	if (adev->asic_type == CHIP_VEGA20) +	if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS)  		adev->df_funcs = &df_v3_6_funcs;  	else  		adev->df_funcs = &df_v1_7_funcs; @@ -672,6 +745,37 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)  #endif  		amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);  		break; +	case CHIP_ARCTURUS: +		amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); +		amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); +		amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); +		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) +			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); +		amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); +		amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); +		amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); +		break; +	case CHIP_RENOIR: +		amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); +		amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); +		amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); +		if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) +			amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block); +		if (is_support_sw_smu(adev)) +			amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); +		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); +		amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); +		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) +			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) +                else if (amdgpu_device_has_dc_support(adev)) +                        amdgpu_device_ip_block_add(adev, &dm_ip_block); +#else +#       warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." +#endif +		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); +		break;  	default:  		return -EINVAL;  	} @@ -688,7 +792,7 @@ static void soc15_invalidate_hdp(struct amdgpu_device *adev,  				 struct amdgpu_ring *ring)  {  	if (!ring || !ring->funcs->emit_wreg) -		WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1); +		WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);  	else  		amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(  			HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); @@ -714,14 +818,9 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,  	/* Set the 2 events that we wish to watch, defined above */  	/* Reg 40 is # received msgs */ +	/* Reg 104 is # of posted requests sent */  	perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); -	/* Pre-VG20, Reg 104 is # of posted requests sent. On VG20 it's 108 */ -	if (adev->asic_type == CHIP_VEGA20) -		perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, -					EVENT1_SEL, 108); -	else -		perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, -					EVENT1_SEL, 104); +	perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);  	/* Write to enable desired perf counters */  	WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr); @@ -751,6 +850,55 @@ static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,  	*count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);  } +static void vega20_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, +				 uint64_t *count1) +{ +	uint32_t perfctr = 0; +	uint64_t cnt0_of, cnt1_of; +	int tmp; + +	/* This reports 0 on APUs, so return to avoid writing/reading registers +	 * that may or may not be different from their GPU counterparts +	 */ +	if (adev->flags & AMD_IS_APU) +		return; + +	/* Set the 2 events that we wish to watch, defined above */ +	/* Reg 40 is # received msgs */ +	/* Reg 108 is # of posted requests sent on VG20 */ +	perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, +				EVENT0_SEL, 40); +	perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, +				EVENT1_SEL, 108); + +	/* Write to enable desired perf counters */ +	WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctr); +	/* Zero out and enable the perf counters +	 * Write 0x5: +	 * Bit 0 = Start all counters(1) +	 * Bit 2 = Global counter reset enable(1) +	 */ +	WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005); + +	msleep(1000); + +	/* Load the shadow and disable the perf counters +	 * Write 0x2: +	 * Bit 0 = Stop counters(0) +	 * Bit 1 = Load the shadow counters(1) +	 */ +	WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002); + +	/* Read register values to get any >32bit overflow */ +	tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3); +	cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER0_UPPER); +	cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER1_UPPER); + +	/* Get the values and add the overflow */ +	*count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | (cnt0_of << 32); +	*count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK3) | (cnt1_of << 32); +} +  static bool soc15_need_reset_on_init(struct amdgpu_device *adev)  {  	u32 sol_reg; @@ -792,6 +940,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =  	.read_bios_from_rom = &soc15_read_bios_from_rom,  	.read_register = &soc15_read_register,  	.reset = &soc15_asic_reset, +	.reset_method = &soc15_asic_reset_method,  	.set_vga_state = &soc15_vga_set_state,  	.get_xclk = &soc15_get_xclk,  	.set_uvd_clocks = &soc15_set_uvd_clocks, @@ -821,9 +970,10 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =  	.invalidate_hdp = &soc15_invalidate_hdp,  	.need_full_reset = &soc15_need_full_reset,  	.init_doorbell_index = &vega20_doorbell_index_init, -	.get_pcie_usage = &soc15_get_pcie_usage, +	.get_pcie_usage = &vega20_get_pcie_usage,  	.need_reset_on_init = &soc15_need_reset_on_init,  	.get_pcie_replay_count = &soc15_get_pcie_replay_count, +	.reset_method = &soc15_asic_reset_method  };  static int soc15_common_early_init(void *handle) @@ -837,6 +987,8 @@ static int soc15_common_early_init(void *handle)  	adev->smc_wreg = NULL;  	adev->pcie_rreg = &soc15_pcie_rreg;  	adev->pcie_wreg = &soc15_pcie_wreg; +	adev->pcie_rreg64 = &soc15_pcie_rreg64; +	adev->pcie_wreg64 = &soc15_pcie_wreg64;  	adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg;  	adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg;  	adev->didt_rreg = &soc15_didt_rreg; @@ -992,6 +1144,48 @@ static int soc15_common_early_init(void *handle)  			adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;  		} +		break; +	case CHIP_ARCTURUS: +		adev->asic_funcs = &vega20_asic_funcs; +		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | +			AMD_CG_SUPPORT_GFX_MGLS | +			AMD_CG_SUPPORT_GFX_CGCG | +			AMD_CG_SUPPORT_GFX_CGLS | +			AMD_CG_SUPPORT_GFX_CP_LS | +			AMD_CG_SUPPORT_HDP_MGCG | +			AMD_CG_SUPPORT_HDP_LS | +			AMD_CG_SUPPORT_SDMA_MGCG | +			AMD_CG_SUPPORT_SDMA_LS | +			AMD_CG_SUPPORT_MC_MGCG | +			AMD_CG_SUPPORT_MC_LS; +		adev->pg_flags = 0; +		adev->external_rev_id = adev->rev_id + 0x32; +		break; +	case CHIP_RENOIR: +		adev->asic_funcs = &soc15_asic_funcs; +		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | +				 AMD_CG_SUPPORT_GFX_MGLS | +				 AMD_CG_SUPPORT_GFX_3D_CGCG | +				 AMD_CG_SUPPORT_GFX_3D_CGLS | +				 AMD_CG_SUPPORT_GFX_CGCG | +				 AMD_CG_SUPPORT_GFX_CGLS | +				 AMD_CG_SUPPORT_GFX_CP_LS | +				 AMD_CG_SUPPORT_MC_MGCG | +				 AMD_CG_SUPPORT_MC_LS | +				 AMD_CG_SUPPORT_SDMA_MGCG | +				 AMD_CG_SUPPORT_SDMA_LS | +				 AMD_CG_SUPPORT_BIF_LS | +				 AMD_CG_SUPPORT_HDP_LS | +				 AMD_CG_SUPPORT_ROM_MGCG | +				 AMD_CG_SUPPORT_VCN_MGCG | +				 AMD_CG_SUPPORT_IH_CG | +				 AMD_CG_SUPPORT_ATHUB_LS | +				 AMD_CG_SUPPORT_ATHUB_MGCG | +				 AMD_CG_SUPPORT_DF_MGCG; +		adev->pg_flags = AMD_PG_SUPPORT_SDMA | +				 AMD_PG_SUPPORT_VCN | +				 AMD_PG_SUPPORT_VCN_DPG; +		adev->external_rev_id = adev->rev_id + 0x91;  		if (adev->pm.pp_feature & PP_GFXOFF_MASK)  			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | @@ -1043,21 +1237,18 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)  	int i;  	struct amdgpu_ring *ring; -	/*  Two reasons to skip -	*		1, Host driver already programmed them -	*		2, To avoid registers program violations in SR-IOV -	*/ -	if (!amdgpu_virt_support_skip_setting(adev)) { +	/* sdma/ih doorbell range are programed by hypervisor */ +	if (!amdgpu_sriov_vf(adev)) {  		for (i = 0; i < adev->sdma.num_instances; i++) {  			ring = &adev->sdma.instance[i].ring;  			adev->nbio_funcs->sdma_doorbell_range(adev, i,  				ring->use_doorbell, ring->doorbell_index,  				adev->doorbell_index.sdma_doorbell_range);  		} -	} -	adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, +		adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,  						adev->irq.ih.doorbell_index); +	}  }  static int soc15_common_hw_init(void *handle) @@ -1134,7 +1325,8 @@ static void soc15_update_hdp_light_sleep(struct amdgpu_device *adev, bool enable  {  	uint32_t def, data; -	if (adev->asic_type == CHIP_VEGA20) { +	if (adev->asic_type == CHIP_VEGA20 || +		adev->asic_type == CHIP_ARCTURUS) {  		def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL));  		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) @@ -1253,6 +1445,7 @@ static int soc15_common_set_clockgating_state(void *handle,  				state == AMD_CG_STATE_GATE ? true : false);  		break;  	case CHIP_RAVEN: +	case CHIP_RENOIR:  		adev->nbio_funcs->update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE ? true : false);  		adev->nbio_funcs->update_medium_grain_light_sleep(adev, @@ -1266,6 +1459,10 @@ static int soc15_common_set_clockgating_state(void *handle,  		soc15_update_rom_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE ? true : false);  		break; +	case CHIP_ARCTURUS: +		soc15_update_hdp_light_sleep(adev, +				state == AMD_CG_STATE_GATE ? true : false); +		break;  	default:  		break;  	} diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 7a6b2cc6d9f5..a3dde0c31f57 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -77,6 +77,7 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,  int vega10_reg_base_init(struct amdgpu_device *adev);  int vega20_reg_base_init(struct amdgpu_device *adev); +int arct_reg_base_init(struct amdgpu_device *adev);  void vega10_doorbell_index_init(struct amdgpu_device *adev);  void vega20_doorbell_index_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 47f74dab365d..839f186e1182 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -69,9 +69,10 @@  		}						\  	} while (0) +#define AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(a) (amdgpu_sriov_vf((a)) && !amdgpu_sriov_runtime((a)))  #define WREG32_RLC(reg, value) \  	do {							\ -		if (amdgpu_virt_support_rlc_prg_reg(adev)) {    \ +		if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) {    \  			uint32_t i = 0;	\  			uint32_t retries = 50000;	\  			uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0;	\ @@ -96,7 +97,7 @@  #define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \  	do {							\  		uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ -		if (amdgpu_virt_support_rlc_prg_reg(adev)) {    \ +		if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) {    \  			uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2;	\  			uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3;	\  			uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;   \ diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c new file mode 100644 index 000000000000..8502e736f721 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -0,0 +1,255 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v6_1.h" +#include "amdgpu_ras.h" +#include "amdgpu.h" + +#include "rsmu/rsmu_0_0_2_offset.h" +#include "rsmu/rsmu_0_0_2_sh_mask.h" +#include "umc/umc_6_1_1_offset.h" +#include "umc/umc_6_1_1_sh_mask.h" + +#define smnMCA_UMC0_MCUMC_ADDRT0	0x50f10 + +/* + * (addr / 256) * 8192, the higher 26 bits in ErrorAddr + * is the index of 8KB block + */ +#define ADDR_OF_8KB_BLOCK(addr)		(((addr) & ~0xffULL) << 5) +/* channel index is the index of 256B block */ +#define ADDR_OF_256B_BLOCK(channel_index)	((channel_index) << 8) +/* offset in 256B block */ +#define OFFSET_IN_256B_BLOCK(addr)		((addr) & 0xffULL) + +const uint32_t +	umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { +		{2, 18, 11, 27},	{4, 20, 13, 29}, +		{1, 17, 8, 24},		{7, 23, 14, 30}, +		{10, 26, 3, 19},	{12, 28, 5, 21}, +		{9, 25, 0, 16},		{15, 31, 6, 22} +}; + +static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev, +					   uint32_t umc_instance) +{ +	uint32_t rsmu_umc_index; + +	rsmu_umc_index = RREG32_SOC15(RSMU, 0, +			mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); +	rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, +			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, +			RSMU_UMC_INDEX_MODE_EN, 1); +	rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, +			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, +			RSMU_UMC_INDEX_INSTANCE, umc_instance); +	rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, +			RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, +			RSMU_UMC_INDEX_WREN, 1 << umc_instance); +	WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, +				rsmu_umc_index); +} + +static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) +{ +	WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, +			RSMU_UMC_INDEX_MODE_EN, 0); +} + +static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, +						   uint32_t umc_reg_offset, +						   unsigned long *error_count) +{ +	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; +	uint32_t ecc_err_cnt, ecc_err_cnt_addr; +	uint64_t mc_umc_status; +	uint32_t mc_umc_status_addr; + +	ecc_err_cnt_sel_addr = +		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); +	ecc_err_cnt_addr = +		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); +	mc_umc_status_addr = +		SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + +	/* select the lower chip and check the error count */ +	ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); +	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, +					EccErrCntCsSel, 0); +	WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); +	ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); +	*error_count += +		(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - +		 UMC_V6_1_CE_CNT_INIT); +	/* clear the lower chip err count */ +	WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + +	/* select the higher chip and check the err counter */ +	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, +					EccErrCntCsSel, 1); +	WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); +	ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); +	*error_count += +		(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - +		 UMC_V6_1_CE_CNT_INIT); +	/* clear the higher chip err count */ +	WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + +	/* check for SRAM correctable error +	  MCUMC_STATUS is a 64 bit register */ +	mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); +	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && +	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && +	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) +		*error_count += 1; +} + +static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev, +						      uint32_t umc_reg_offset, +						      unsigned long *error_count) +{ +	uint64_t mc_umc_status; +	uint32_t mc_umc_status_addr; + +	mc_umc_status_addr = +                SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + +	/* check the MCUMC_STATUS */ +	mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); +	if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && +	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || +	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || +	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || +	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || +	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) +		*error_count += 1; +} + +static void umc_v6_1_query_error_count(struct amdgpu_device *adev, +					   struct ras_err_data *err_data, uint32_t umc_reg_offset, +					   uint32_t channel_index) +{ +	umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, +						   &(err_data->ce_count)); +	umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, +						  &(err_data->ue_count)); +} + +static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, +					   void *ras_error_status) +{ +	amdgpu_umc_for_each_channel(umc_v6_1_query_error_count); +} + +static void umc_v6_1_query_error_address(struct amdgpu_device *adev, +					 struct ras_err_data *err_data, +					 uint32_t umc_reg_offset, uint32_t channel_index) +{ +	uint32_t lsb, mc_umc_status_addr; +	uint64_t mc_umc_status, err_addr; + +	mc_umc_status_addr = +		SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + +	/* skip error address process if -ENOMEM */ +	if (!err_data->err_addr) { +		/* clear umc status */ +		WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); +		return; +	} + +	mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + +	/* calculate error address if ue/ce error is detected */ +	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && +	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || +	    REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { +		err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4); + +		/* the lowest lsb bits should be ignored */ +		lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); +		err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); +		err_addr &= ~((0x1ULL << lsb) - 1); + +		/* translate umc channel address to soc pa, 3 parts are included */ +		err_data->err_addr[err_data->err_addr_cnt] = +						ADDR_OF_8KB_BLOCK(err_addr) | +						ADDR_OF_256B_BLOCK(channel_index) | +						OFFSET_IN_256B_BLOCK(err_addr); + +		err_data->err_addr_cnt++; +	} + +	/* clear umc status */ +	WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); +} + +static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, +					     void *ras_error_status) +{ +	amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); +} + +static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev, +					 struct ras_err_data *err_data, +					 uint32_t umc_reg_offset, uint32_t channel_index) +{ +	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; +	uint32_t ecc_err_cnt_addr; + +	ecc_err_cnt_sel_addr = +		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); +	ecc_err_cnt_addr = +		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + +	/* select the lower chip and check the error count */ +	ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); +	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, +					EccErrCntCsSel, 0); +	/* set ce error interrupt type to APIC based interrupt */ +	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, +					EccErrInt, 0x1); +	WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); +	/* set error count to initial value */ +	WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + +	/* select the higher chip and check the err counter */ +	ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, +					EccErrCntCsSel, 1); +	WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); +	WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); +} + +static void umc_v6_1_ras_init(struct amdgpu_device *adev) +{ +	void *ras_error_status = NULL; + +	amdgpu_umc_for_each_channel(umc_v6_1_ras_init_per_channel); +} + +const struct amdgpu_umc_funcs umc_v6_1_funcs = { +	.ras_init = umc_v6_1_ras_init, +	.query_ras_error_count = umc_v6_1_query_ras_error_count, +	.query_ras_error_address = umc_v6_1_query_ras_error_address, +	.enable_umc_index_mode = umc_v6_1_enable_umc_index_mode, +	.disable_umc_index_mode = umc_v6_1_disable_umc_index_mode, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h new file mode 100644 index 000000000000..dab9cbd292c5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h @@ -0,0 +1,51 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V6_1_H__ +#define __UMC_V6_1_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* HBM  Memory Channel Width */ +#define UMC_V6_1_HBM_MEMORY_CHANNEL_WIDTH	128 +/* number of umc channel instance with memory map register access */ +#define UMC_V6_1_CHANNEL_INSTANCE_NUM		4 +/* number of umc instance with memory map register access */ +#define UMC_V6_1_UMC_INSTANCE_NUM		8 +/* total channel instances in one umc block */ +#define UMC_V6_1_TOTAL_CHANNEL_NUM	(UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM) +/* UMC regiser per channel offset */ +#define UMC_V6_1_PER_CHANNEL_OFFSET		0x800 + +/* EccErrCnt max value */ +#define UMC_V6_1_CE_CNT_MAX		0xffff +/* umc ce interrupt threshold */ +#define UMC_V6_1_CE_INT_THRESHOLD	0xffff +/* umc ce count initial value */ +#define UMC_V6_1_CE_CNT_INIT	(UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) + +extern const struct amdgpu_umc_funcs umc_v6_1_funcs; +extern const uint32_t +	umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index a6bfe7651d07..01f658fa72c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1763,7 +1763,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {  	.align_mask = 0xf,  	.support_64bit_ptrs = false,  	.no_user_fence = true, -	.vmhub = AMDGPU_MMHUB, +	.vmhub = AMDGPU_MMHUB_0,  	.get_rptr = uvd_v7_0_ring_get_rptr,  	.get_wptr = uvd_v7_0_ring_get_wptr,  	.set_wptr = uvd_v7_0_ring_set_wptr, @@ -1796,7 +1796,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {  	.nop = HEVC_ENC_CMD_NO_OP,  	.support_64bit_ptrs = false,  	.no_user_fence = true, -	.vmhub = AMDGPU_MMHUB, +	.vmhub = AMDGPU_MMHUB_0,  	.get_rptr = uvd_v7_0_enc_ring_get_rptr,  	.get_wptr = uvd_v7_0_enc_ring_get_wptr,  	.set_wptr = uvd_v7_0_enc_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index eafbe8d8248d..683701cf7270 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1070,7 +1070,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {  	.nop = VCE_CMD_NO_OP,  	.support_64bit_ptrs = false,  	.no_user_fence = true, -	.vmhub = AMDGPU_MMHUB, +	.vmhub = AMDGPU_MMHUB_0,  	.get_rptr = vce_v4_0_ring_get_rptr,  	.get_wptr = vce_v4_0_ring_get_wptr,  	.set_wptr = vce_v4_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index dde22b7d140d..93b3500e522b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -63,6 +63,7 @@ static int vcn_v1_0_early_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	adev->vcn.num_vcn_inst = 1;  	adev->vcn.num_enc_rings = 2;  	vcn_v1_0_set_dec_ring_funcs(adev); @@ -87,20 +88,21 @@ static int vcn_v1_0_sw_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	/* VCN DEC TRAP */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq); +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, +			VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst->irq);  	if (r)  		return r;  	/* VCN ENC TRAP */  	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {  		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE, -					&adev->vcn.irq); +					&adev->vcn.inst->irq);  		if (r)  			return r;  	}  	/* VCN JPEG TRAP */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq); +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.inst->irq);  	if (r)  		return r; @@ -122,39 +124,39 @@ static int vcn_v1_0_sw_init(void *handle)  	if (r)  		return r; -	ring = &adev->vcn.ring_dec; +	ring = &adev->vcn.inst->ring_dec;  	sprintf(ring->name, "vcn_dec"); -	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); +	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);  	if (r)  		return r; -	adev->vcn.internal.scratch9 = adev->vcn.external.scratch9 = +	adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 =  		SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); -	adev->vcn.internal.data0 = adev->vcn.external.data0 = +	adev->vcn.internal.data0 = adev->vcn.inst->external.data0 =  		SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); -	adev->vcn.internal.data1 = adev->vcn.external.data1 = +	adev->vcn.internal.data1 = adev->vcn.inst->external.data1 =  		SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); -	adev->vcn.internal.cmd = adev->vcn.external.cmd = +	adev->vcn.internal.cmd = adev->vcn.inst->external.cmd =  		SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); -	adev->vcn.internal.nop = adev->vcn.external.nop = +	adev->vcn.internal.nop = adev->vcn.inst->external.nop =  		SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);  	for (i = 0; i < adev->vcn.num_enc_rings; ++i) { -		ring = &adev->vcn.ring_enc[i]; +		ring = &adev->vcn.inst->ring_enc[i];  		sprintf(ring->name, "vcn_enc%d", i); -		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); +		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);  		if (r)  			return r;  	} -	ring = &adev->vcn.ring_jpeg; +	ring = &adev->vcn.inst->ring_jpeg;  	sprintf(ring->name, "vcn_jpeg"); -	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); +	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);  	if (r)  		return r;  	adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; -	adev->vcn.internal.jpeg_pitch = adev->vcn.external.jpeg_pitch = +	adev->vcn.internal.jpeg_pitch = adev->vcn.inst->external.jpeg_pitch =  		SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);  	return 0; @@ -191,7 +193,7 @@ static int vcn_v1_0_sw_fini(void *handle)  static int vcn_v1_0_hw_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	struct amdgpu_ring *ring = &adev->vcn.ring_dec; +	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;  	int i, r;  	r = amdgpu_ring_test_helper(ring); @@ -199,14 +201,14 @@ static int vcn_v1_0_hw_init(void *handle)  		goto done;  	for (i = 0; i < adev->vcn.num_enc_rings; ++i) { -		ring = &adev->vcn.ring_enc[i]; +		ring = &adev->vcn.inst->ring_enc[i];  		ring->sched.ready = true;  		r = amdgpu_ring_test_helper(ring);  		if (r)  			goto done;  	} -	ring = &adev->vcn.ring_jpeg; +	ring = &adev->vcn.inst->ring_jpeg;  	r = amdgpu_ring_test_helper(ring);  	if (r)  		goto done; @@ -229,7 +231,7 @@ done:  static int vcn_v1_0_hw_fini(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	struct amdgpu_ring *ring = &adev->vcn.ring_dec; +	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;  	if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||  		RREG32_SOC15(VCN, 0, mmUVD_STATUS)) @@ -304,9 +306,9 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)  		offset = 0;  	} else {  		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, -			lower_32_bits(adev->vcn.gpu_addr)); +			lower_32_bits(adev->vcn.inst->gpu_addr));  		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, -			upper_32_bits(adev->vcn.gpu_addr)); +			upper_32_bits(adev->vcn.inst->gpu_addr));  		offset = size;  		WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,  			     AMDGPU_UVD_FIRMWARE_OFFSET >> 3); @@ -316,17 +318,17 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)  	/* cache window 1: stack */  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, -		     lower_32_bits(adev->vcn.gpu_addr + offset)); +		     lower_32_bits(adev->vcn.inst->gpu_addr + offset));  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, -		     upper_32_bits(adev->vcn.gpu_addr + offset)); +		     upper_32_bits(adev->vcn.inst->gpu_addr + offset));  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);  	/* cache window 2: context */  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, -		     lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); +		     lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, -		     upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); +		     upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); @@ -374,9 +376,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)  		offset = 0;  	} else {  		WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, -			lower_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); +			lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);  		WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, -			upper_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); +			upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0);  		offset = size;  		WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,  			     AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0); @@ -386,9 +388,9 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)  	/* cache window 1: stack */  	WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, -		     lower_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); +		     lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);  	WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, -		     upper_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); +		     upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0);  	WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0,  			     0xFFFFFFFF, 0);  	WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE, @@ -396,10 +398,10 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)  	/* cache window 2: context */  	WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, -		     lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), +		     lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),  			     0xFFFFFFFF, 0);  	WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, -		     upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), +		     upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),  			     0xFFFFFFFF, 0);  	WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0);  	WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE, @@ -779,7 +781,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)   */  static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)  { -	struct amdgpu_ring *ring = &adev->vcn.ring_dec; +	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;  	uint32_t rb_bufsz, tmp;  	uint32_t lmi_swap_cntl;  	int i, j, r; @@ -932,21 +934,21 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)  	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,  			~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); -	ring = &adev->vcn.ring_enc[0]; +	ring = &adev->vcn.inst->ring_enc[0];  	WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);  	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); -	ring = &adev->vcn.ring_enc[1]; +	ring = &adev->vcn.inst->ring_enc[1];  	WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);  	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); -	ring = &adev->vcn.ring_jpeg; +	ring = &adev->vcn.inst->ring_jpeg;  	WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);  	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |  			UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); @@ -968,7 +970,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)  static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)  { -	struct amdgpu_ring *ring = &adev->vcn.ring_dec; +	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;  	uint32_t rb_bufsz, tmp;  	uint32_t lmi_swap_cntl; @@ -1106,7 +1108,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)  			~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);  	/* initialize JPEG wptr */ -	ring = &adev->vcn.ring_jpeg; +	ring = &adev->vcn.inst->ring_jpeg;  	ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);  	/* copy patch commands to the jpeg ring */ @@ -1255,21 +1257,21 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,  						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);  				/* Restore */ -				ring = &adev->vcn.ring_enc[0]; +				ring = &adev->vcn.inst->ring_enc[0];  				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);  				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));  				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);  				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));  				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); -				ring = &adev->vcn.ring_enc[1]; +				ring = &adev->vcn.inst->ring_enc[1];  				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);  				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));  				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);  				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));  				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); -				ring = &adev->vcn.ring_dec; +				ring = &adev->vcn.inst->ring_dec;  				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,  						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);  				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, @@ -1315,7 +1317,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,  							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);  				/* Restore */ -				ring = &adev->vcn.ring_jpeg; +				ring = &adev->vcn.inst->ring_jpeg;  				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);  				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,  							UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | @@ -1329,7 +1331,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,  				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,  							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); -				ring = &adev->vcn.ring_dec; +				ring = &adev->vcn.inst->ring_dec;  				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,  						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);  				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, @@ -1596,7 +1598,7 @@ static uint64_t vcn_v1_0_enc_ring_get_rptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	if (ring == &adev->vcn.ring_enc[0]) +	if (ring == &adev->vcn.inst->ring_enc[0])  		return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);  	else  		return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); @@ -1613,7 +1615,7 @@ static uint64_t vcn_v1_0_enc_ring_get_wptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	if (ring == &adev->vcn.ring_enc[0]) +	if (ring == &adev->vcn.inst->ring_enc[0])  		return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);  	else  		return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); @@ -1630,7 +1632,7 @@ static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	if (ring == &adev->vcn.ring_enc[0]) +	if (ring == &adev->vcn.inst->ring_enc[0])  		WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR,  			lower_32_bits(ring->wptr));  	else @@ -2114,16 +2116,16 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,  	switch (entry->src_id) {  	case 124: -		amdgpu_fence_process(&adev->vcn.ring_dec); +		amdgpu_fence_process(&adev->vcn.inst->ring_dec);  		break;  	case 119: -		amdgpu_fence_process(&adev->vcn.ring_enc[0]); +		amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]);  		break;  	case 120: -		amdgpu_fence_process(&adev->vcn.ring_enc[1]); +		amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);  		break;  	case 126: -		amdgpu_fence_process(&adev->vcn.ring_jpeg); +		amdgpu_fence_process(&adev->vcn.inst->ring_jpeg);  		break;  	default:  		DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -2198,7 +2200,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {  	.align_mask = 0xf,  	.support_64bit_ptrs = false,  	.no_user_fence = true, -	.vmhub = AMDGPU_MMHUB, +	.vmhub = AMDGPU_MMHUB_0,  	.get_rptr = vcn_v1_0_dec_ring_get_rptr,  	.get_wptr = vcn_v1_0_dec_ring_get_wptr,  	.set_wptr = vcn_v1_0_dec_ring_set_wptr, @@ -2232,7 +2234,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {  	.nop = VCN_ENC_CMD_NO_OP,  	.support_64bit_ptrs = false,  	.no_user_fence = true, -	.vmhub = AMDGPU_MMHUB, +	.vmhub = AMDGPU_MMHUB_0,  	.get_rptr = vcn_v1_0_enc_ring_get_rptr,  	.get_wptr = vcn_v1_0_enc_ring_get_wptr,  	.set_wptr = vcn_v1_0_enc_ring_set_wptr, @@ -2264,7 +2266,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {  	.nop = PACKET0(0x81ff, 0),  	.support_64bit_ptrs = false,  	.no_user_fence = true, -	.vmhub = AMDGPU_MMHUB, +	.vmhub = AMDGPU_MMHUB_0,  	.extra_dw = 64,  	.get_rptr = vcn_v1_0_jpeg_ring_get_rptr,  	.get_wptr = vcn_v1_0_jpeg_ring_get_wptr, @@ -2295,7 +2297,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {  static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)  { -	adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; +	adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;  	DRM_INFO("VCN decode is enabled in VM mode\n");  } @@ -2304,14 +2306,14 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)  	int i;  	for (i = 0; i < adev->vcn.num_enc_rings; ++i) -		adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; +		adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs;  	DRM_INFO("VCN encode is enabled in VM mode\n");  }  static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)  { -	adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; +	adev->vcn.inst->ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs;  	DRM_INFO("VCN jpeg decode is enabled in VM mode\n");  } @@ -2322,8 +2324,8 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {  static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev)  { -	adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; -	adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; +	adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; +	adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs;  }  const struct amdgpu_ip_block_version vcn_v1_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 988c0adaca91..36ad0c0e8efb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -22,7 +22,7 @@   */  #include <linux/firmware.h> -#include <drm/drmP.h> +  #include "amdgpu.h"  #include "amdgpu_vcn.h"  #include "soc15.h" @@ -92,6 +92,7 @@ static int vcn_v2_0_early_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	adev->vcn.num_vcn_inst = 1;  	adev->vcn.num_enc_rings = 2;  	vcn_v2_0_set_dec_ring_funcs(adev); @@ -118,7 +119,7 @@ static int vcn_v2_0_sw_init(void *handle)  	/* VCN DEC TRAP */  	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,  			      VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, -			      &adev->vcn.irq); +			      &adev->vcn.inst->irq);  	if (r)  		return r; @@ -126,15 +127,14 @@ static int vcn_v2_0_sw_init(void *handle)  	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {  		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,  				      i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, -				      &adev->vcn.irq); +				      &adev->vcn.inst->irq);  		if (r)  			return r;  	}  	/* VCN JPEG TRAP */  	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, -			      VCN_2_0__SRCID__JPEG_DECODE, -			      &adev->vcn.irq); +			      VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst->irq);  	if (r)  		return r; @@ -156,49 +156,56 @@ static int vcn_v2_0_sw_init(void *handle)  	if (r)  		return r; -	ring = &adev->vcn.ring_dec; +	ring = &adev->vcn.inst->ring_dec;  	ring->use_doorbell = true;  	ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1;  	sprintf(ring->name, "vcn_dec"); -	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); +	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);  	if (r)  		return r; +	adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; +	adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; +	adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; +	adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; +	adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; +	adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; +  	adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; -	adev->vcn.external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); +	adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9);  	adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; -	adev->vcn.external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); +	adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0);  	adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; -	adev->vcn.external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); +	adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1);  	adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; -	adev->vcn.external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); +	adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD);  	adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; -	adev->vcn.external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); +	adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP);  	for (i = 0; i < adev->vcn.num_enc_rings; ++i) { -		ring = &adev->vcn.ring_enc[i]; +		ring = &adev->vcn.inst->ring_enc[i];  		ring->use_doorbell = true;  		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i;  		sprintf(ring->name, "vcn_enc%d", i); -		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); +		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);  		if (r)  			return r;  	} -	ring = &adev->vcn.ring_jpeg; +	ring = &adev->vcn.inst->ring_jpeg;  	ring->use_doorbell = true;  	ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1;  	sprintf(ring->name, "vcn_jpeg"); -	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); +	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0);  	if (r)  		return r;  	adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode;  	adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; -	adev->vcn.external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); +	adev->vcn.inst->external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH);  	return 0;  } @@ -234,11 +241,11 @@ static int vcn_v2_0_sw_fini(void *handle)  static int vcn_v2_0_hw_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	struct amdgpu_ring *ring = &adev->vcn.ring_dec; +	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;  	int i, r;  	adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, -		ring->doorbell_index); +					     ring->doorbell_index, 0);  	ring->sched.ready = true;  	r = amdgpu_ring_test_ring(ring); @@ -248,7 +255,7 @@ static int vcn_v2_0_hw_init(void *handle)  	}  	for (i = 0; i < adev->vcn.num_enc_rings; ++i) { -		ring = &adev->vcn.ring_enc[i]; +		ring = &adev->vcn.inst->ring_enc[i];  		ring->sched.ready = true;  		r = amdgpu_ring_test_ring(ring);  		if (r) { @@ -257,7 +264,7 @@ static int vcn_v2_0_hw_init(void *handle)  		}  	} -	ring = &adev->vcn.ring_jpeg; +	ring = &adev->vcn.inst->ring_jpeg;  	ring->sched.ready = true;  	r = amdgpu_ring_test_ring(ring);  	if (r) { @@ -283,7 +290,7 @@ done:  static int vcn_v2_0_hw_fini(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	struct amdgpu_ring *ring = &adev->vcn.ring_dec; +	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;  	int i;  	if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || @@ -294,11 +301,11 @@ static int vcn_v2_0_hw_fini(void *handle)  	ring->sched.ready = false;  	for (i = 0; i < adev->vcn.num_enc_rings; ++i) { -		ring = &adev->vcn.ring_enc[i]; +		ring = &adev->vcn.inst->ring_enc[i];  		ring->sched.ready = false;  	} -	ring = &adev->vcn.ring_jpeg; +	ring = &adev->vcn.inst->ring_jpeg;  	ring->sched.ready = false;  	return 0; @@ -368,32 +375,29 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)  		offset = 0;  	} else {  		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, -			lower_32_bits(adev->vcn.gpu_addr)); +			lower_32_bits(adev->vcn.inst->gpu_addr));  		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, -			upper_32_bits(adev->vcn.gpu_addr)); +			upper_32_bits(adev->vcn.inst->gpu_addr));  		offset = size; -		/* No signed header for now from firmware  		WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,  			AMDGPU_UVD_FIRMWARE_OFFSET >> 3); -		*/ -		WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);  	}  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);  	/* cache window 1: stack */  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, -		lower_32_bits(adev->vcn.gpu_addr + offset)); +		lower_32_bits(adev->vcn.inst->gpu_addr + offset));  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, -		upper_32_bits(adev->vcn.gpu_addr + offset)); +		upper_32_bits(adev->vcn.inst->gpu_addr + offset));  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);  	/* cache window 2: context */  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, -		lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); +		lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, -		upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); +		upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); @@ -429,10 +433,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec  	} else {  		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(  			UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), -			lower_32_bits(adev->vcn.gpu_addr), 0, indirect); +			lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);  		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(  			UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), -			upper_32_bits(adev->vcn.gpu_addr), 0, indirect); +			upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect);  		offset = size;  		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(  			UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), @@ -450,10 +454,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec  	if (!indirect) {  		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(  			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), -			lower_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); +			lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);  		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(  			UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), -			upper_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); +			upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect);  		WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(  			UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);  	} else { @@ -470,10 +474,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec  	/* cache window 2: context */  	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(  		UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), -		lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); +		lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);  	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(  		UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), -		upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); +		upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);  	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0(  		UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);  	WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( @@ -661,7 +665,7 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,   */  static int jpeg_v2_0_start(struct amdgpu_device *adev)  { -	struct amdgpu_ring *ring = &adev->vcn.ring_jpeg; +	struct amdgpu_ring *ring = &adev->vcn.inst->ring_jpeg;  	uint32_t tmp;  	int r = 0; @@ -923,7 +927,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev)  static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)  { -	struct amdgpu_ring *ring = &adev->vcn.ring_dec; +	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;  	uint32_t rb_bufsz, tmp;  	vcn_v2_0_enable_static_power_gating(adev); @@ -1049,7 +1053,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)  static int vcn_v2_0_start(struct amdgpu_device *adev)  { -	struct amdgpu_ring *ring = &adev->vcn.ring_dec; +	struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec;  	uint32_t rb_bufsz, tmp;  	uint32_t lmi_swap_cntl;  	int i, j, r; @@ -1200,14 +1204,14 @@ static int vcn_v2_0_start(struct amdgpu_device *adev)  	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,  			lower_32_bits(ring->wptr)); -	ring = &adev->vcn.ring_enc[0]; +	ring = &adev->vcn.inst->ring_enc[0];  	WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);  	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); -	ring = &adev->vcn.ring_enc[1]; +	ring = &adev->vcn.inst->ring_enc[1];  	WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); @@ -1354,14 +1358,14 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev,  					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);  				/* Restore */ -				ring = &adev->vcn.ring_enc[0]; +				ring = &adev->vcn.inst->ring_enc[0];  				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);  				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));  				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);  				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));  				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); -				ring = &adev->vcn.ring_enc[1]; +				ring = &adev->vcn.inst->ring_enc[1];  				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);  				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));  				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); @@ -1483,12 +1487,14 @@ static void vcn_v2_0_dec_ring_set_wptr(struct amdgpu_ring *ring)   *   * Write a start command to the ring.   */ -static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) +void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)  { -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); +	struct amdgpu_device *adev = ring->adev; + +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));  	amdgpu_ring_write(ring, 0); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); -	amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); +	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));  }  /** @@ -1498,10 +1504,12 @@ static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)   *   * Write a end command to the ring.   */ -static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) +void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)  { -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); -	amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1); +	struct amdgpu_device *adev = ring->adev; + +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); +	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1));  }  /** @@ -1511,14 +1519,15 @@ static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)   *   * Write a nop command to the ring.   */ -static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)  { +	struct amdgpu_device *adev = ring->adev;  	int i;  	WARN_ON(ring->wptr % 2 || count % 2);  	for (i = 0; i < count / 2; i++) { -		amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP_INTERNAL_OFFSET, 0)); +		amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0));  		amdgpu_ring_write(ring, 0);  	}  } @@ -1531,32 +1540,33 @@ static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun   *   * Write a fence and a trap command to the ring.   */ -static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, -				     unsigned flags) +void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, +				unsigned flags)  { -	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); +	struct amdgpu_device *adev = ring->adev; -	amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID_INTERNAL_OFFSET, 0)); +	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0));  	amdgpu_ring_write(ring, seq); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));  	amdgpu_ring_write(ring, addr & 0xffffffff); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));  	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); -	amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); +	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1)); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));  	amdgpu_ring_write(ring, 0); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));  	amdgpu_ring_write(ring, 0); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); -	amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1); +	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1));  }  /** @@ -1567,44 +1577,46 @@ static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64   *   * Write ring commands to execute the indirect buffer   */ -static void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, -				      struct amdgpu_job *job, -				      struct amdgpu_ib *ib, -				      uint32_t flags) +void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, +			       struct amdgpu_job *job, +			       struct amdgpu_ib *ib, +			       uint32_t flags)  { +	struct amdgpu_device *adev = ring->adev;  	unsigned vmid = AMDGPU_JOB_GET_VMID(job); -	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0));  	amdgpu_ring_write(ring, vmid); -	amdgpu_ring_write(ring,	PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring,	PACKET0(adev->vcn.internal.ib_bar_low, 0));  	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); -	amdgpu_ring_write(ring,	PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring,	PACKET0(adev->vcn.internal.ib_bar_high, 0));  	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); -	amdgpu_ring_write(ring,	PACKET0(mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring,	PACKET0(adev->vcn.internal.ib_size, 0));  	amdgpu_ring_write(ring, ib->length_dw);  } -static void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, -					    uint32_t reg, uint32_t val, -					    uint32_t mask) +void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, +				uint32_t val, uint32_t mask)  { -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); +	struct amdgpu_device *adev = ring->adev; + +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));  	amdgpu_ring_write(ring, reg << 2); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));  	amdgpu_ring_write(ring, val); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0));  	amdgpu_ring_write(ring, mask); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); -	amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1); +	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1));  } -static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, -					    unsigned vmid, uint64_t pd_addr) +void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, +				unsigned vmid, uint64_t pd_addr)  {  	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];  	uint32_t data0, data1, mask; @@ -1618,18 +1630,20 @@ static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,  	vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);  } -static void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, -					uint32_t reg, uint32_t val) +void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, +				uint32_t reg, uint32_t val)  { -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); +	struct amdgpu_device *adev = ring->adev; + +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0));  	amdgpu_ring_write(ring, reg << 2); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0));  	amdgpu_ring_write(ring, val); -	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); -	amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1); +	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1));  }  /** @@ -1643,7 +1657,7 @@ static uint64_t vcn_v2_0_enc_ring_get_rptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	if (ring == &adev->vcn.ring_enc[0]) +	if (ring == &adev->vcn.inst->ring_enc[0])  		return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);  	else  		return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); @@ -1660,7 +1674,7 @@ static uint64_t vcn_v2_0_enc_ring_get_wptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	if (ring == &adev->vcn.ring_enc[0]) { +	if (ring == &adev->vcn.inst->ring_enc[0]) {  		if (ring->use_doorbell)  			return adev->wb.wb[ring->wptr_offs];  		else @@ -1684,7 +1698,7 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	if (ring == &adev->vcn.ring_enc[0]) { +	if (ring == &adev->vcn.inst->ring_enc[0]) {  		if (ring->use_doorbell) {  			adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);  			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); @@ -1709,8 +1723,8 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring)   *   * Write enc a fence and a trap command to the ring.   */ -static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, -			u64 seq, unsigned flags) +void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, +				u64 seq, unsigned flags)  {  	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -1721,7 +1735,7 @@ static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,  	amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP);  } -static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) +void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)  {  	amdgpu_ring_write(ring, VCN_ENC_CMD_END);  } @@ -1734,10 +1748,10 @@ static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)   *   * Write enc ring commands to execute the indirect buffer   */ -static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, -				      struct amdgpu_job *job, -				      struct amdgpu_ib *ib, -				      uint32_t flags) +void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, +			       struct amdgpu_job *job, +			       struct amdgpu_ib *ib, +			       uint32_t flags)  {  	unsigned vmid = AMDGPU_JOB_GET_VMID(job); @@ -1748,9 +1762,8 @@ static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring,  	amdgpu_ring_write(ring, ib->length_dw);  } -static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, -					    uint32_t reg, uint32_t val, -					    uint32_t mask) +void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, +				uint32_t val, uint32_t mask)  {  	amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);  	amdgpu_ring_write(ring, reg << 2); @@ -1758,8 +1771,8 @@ static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,  	amdgpu_ring_write(ring, val);  } -static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, -					    unsigned int vmid, uint64_t pd_addr) +void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, +				unsigned int vmid, uint64_t pd_addr)  {  	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; @@ -1770,8 +1783,7 @@ static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,  					lower_32_bits(pd_addr), 0xffffffff);  } -static void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, -					uint32_t reg, uint32_t val) +void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)  {  	amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);  	amdgpu_ring_write(ring,	reg << 2); @@ -1835,7 +1847,7 @@ static void vcn_v2_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring)   *   * Write a start command to the ring.   */ -static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) +void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)  {  	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,  		0, 0, PACKETJ_TYPE0)); @@ -1853,7 +1865,7 @@ static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)   *   * Write a end command to the ring.   */ -static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) +void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)  {  	amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,  		0, 0, PACKETJ_TYPE0)); @@ -1872,8 +1884,8 @@ static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)   *   * Write a fence and a trap command to the ring.   */ -static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, -				     unsigned flags) +void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, +				unsigned flags)  {  	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -1921,10 +1933,10 @@ static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u6   *   * Write ring commands to execute the indirect buffer.   */ -static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, -				       struct amdgpu_job *job, -				       struct amdgpu_ib *ib, -				       uint32_t flags) +void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, +				struct amdgpu_job *job, +				struct amdgpu_ib *ib, +				uint32_t flags)  {  	unsigned vmid = AMDGPU_JOB_GET_VMID(job); @@ -1972,9 +1984,8 @@ static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,  	amdgpu_ring_write(ring, 0x2);  } -static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, -					    uint32_t reg, uint32_t val, -					    uint32_t mask) +void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, +				uint32_t val, uint32_t mask)  {  	uint32_t reg_offset = (reg << 2); @@ -2000,8 +2011,8 @@ static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring,  	amdgpu_ring_write(ring, mask);  } -static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, -		unsigned vmid, uint64_t pd_addr) +void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, +				unsigned vmid, uint64_t pd_addr)  {  	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];  	uint32_t data0, data1, mask; @@ -2015,8 +2026,7 @@ static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,  	vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask);  } -static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, -					uint32_t reg, uint32_t val) +void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)  {  	uint32_t reg_offset = (reg << 2); @@ -2034,7 +2044,7 @@ static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring,  	amdgpu_ring_write(ring, val);  } -static void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) +void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)  {  	int i; @@ -2062,16 +2072,16 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,  	switch (entry->src_id) {  	case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: -		amdgpu_fence_process(&adev->vcn.ring_dec); +		amdgpu_fence_process(&adev->vcn.inst->ring_dec);  		break;  	case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: -		amdgpu_fence_process(&adev->vcn.ring_enc[0]); +		amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]);  		break;  	case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: -		amdgpu_fence_process(&adev->vcn.ring_enc[1]); +		amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]);  		break;  	case VCN_2_0__SRCID__JPEG_DECODE: -		amdgpu_fence_process(&adev->vcn.ring_jpeg); +		amdgpu_fence_process(&adev->vcn.inst->ring_jpeg);  		break;  	default:  		DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -2082,6 +2092,36 @@ static int vcn_v2_0_process_interrupt(struct amdgpu_device *adev,  	return 0;  } +static int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; +	uint32_t tmp = 0; +	unsigned i; +	int r; + +	WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); +	r = amdgpu_ring_alloc(ring, 4); +	if (r) +		return r; +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); +	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); +	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); +	amdgpu_ring_write(ring, 0xDEADBEEF); +	amdgpu_ring_commit(ring); +	for (i = 0; i < adev->usec_timeout; i++) { +		tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); +		if (tmp == 0xDEADBEEF) +			break; +		udelay(1); +	} + +	if (i >= adev->usec_timeout) +		r = -ETIMEDOUT; + +	return r; +} + +  static int vcn_v2_0_set_powergating_state(void *handle,  					  enum amd_powergating_state state)  { @@ -2131,7 +2171,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {  static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {  	.type = AMDGPU_RING_TYPE_VCN_DEC,  	.align_mask = 0xf, -	.vmhub = AMDGPU_MMHUB, +	.vmhub = AMDGPU_MMHUB_0,  	.get_rptr = vcn_v2_0_dec_ring_get_rptr,  	.get_wptr = vcn_v2_0_dec_ring_get_wptr,  	.set_wptr = vcn_v2_0_dec_ring_set_wptr, @@ -2145,7 +2185,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {  	.emit_ib = vcn_v2_0_dec_ring_emit_ib,  	.emit_fence = vcn_v2_0_dec_ring_emit_fence,  	.emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, -	.test_ring = amdgpu_vcn_dec_ring_test_ring, +	.test_ring = vcn_v2_0_dec_ring_test_ring,  	.test_ib = amdgpu_vcn_dec_ring_test_ib,  	.insert_nop = vcn_v2_0_dec_ring_insert_nop,  	.insert_start = vcn_v2_0_dec_ring_insert_start, @@ -2162,7 +2202,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {  	.type = AMDGPU_RING_TYPE_VCN_ENC,  	.align_mask = 0x3f,  	.nop = VCN_ENC_CMD_NO_OP, -	.vmhub = AMDGPU_MMHUB, +	.vmhub = AMDGPU_MMHUB_0,  	.get_rptr = vcn_v2_0_enc_ring_get_rptr,  	.get_wptr = vcn_v2_0_enc_ring_get_wptr,  	.set_wptr = vcn_v2_0_enc_ring_set_wptr, @@ -2191,7 +2231,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {  static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = {  	.type = AMDGPU_RING_TYPE_VCN_JPEG,  	.align_mask = 0xf, -	.vmhub = AMDGPU_MMHUB, +	.vmhub = AMDGPU_MMHUB_0,  	.get_rptr = vcn_v2_0_jpeg_ring_get_rptr,  	.get_wptr = vcn_v2_0_jpeg_ring_get_wptr,  	.set_wptr = vcn_v2_0_jpeg_ring_set_wptr, @@ -2220,7 +2260,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = {  static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev)  { -	adev->vcn.ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; +	adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs;  	DRM_INFO("VCN decode is enabled in VM mode\n");  } @@ -2229,14 +2269,14 @@ static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev)  	int i;  	for (i = 0; i < adev->vcn.num_enc_rings; ++i) -		adev->vcn.ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; +		adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs;  	DRM_INFO("VCN encode is enabled in VM mode\n");  }  static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)  { -	adev->vcn.ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs; +	adev->vcn.inst->ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs;  	DRM_INFO("VCN jpeg decode is enabled in VM mode\n");  } @@ -2247,8 +2287,8 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = {  static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev)  { -	adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; -	adev->vcn.irq.funcs = &vcn_v2_0_irq_funcs; +	adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; +	adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs;  }  const struct amdgpu_ip_block_version vcn_v2_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h index a74227f4663b..8467292f32e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h @@ -24,6 +24,44 @@  #ifndef __VCN_V2_0_H__  #define __VCN_V2_0_H__ +extern void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); +extern void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); +extern void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, +				unsigned flags); +extern void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, +				struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, +				uint32_t val, uint32_t mask); +extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, +				unsigned vmid, uint64_t pd_addr); +extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, +				uint32_t reg, uint32_t val); + +extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, +				u64 seq, unsigned flags); +extern void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, +				struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, +				uint32_t val, uint32_t mask); +extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, +				unsigned int vmid, uint64_t pd_addr); +extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); + +extern void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring); +extern void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, +				unsigned flags); +extern void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, +				struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, +				uint32_t val, uint32_t mask); +extern void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, +				unsigned vmid, uint64_t pd_addr); +extern void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); +extern void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count); +  extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block;  #endif /* __VCN_V2_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c new file mode 100644 index 000000000000..395c2259f979 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -0,0 +1,1414 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> + +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "soc15.h" +#include "soc15d.h" +#include "vcn_v2_0.h" + +#include "vcn/vcn_2_5_offset.h" +#include "vcn/vcn_2_5_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET			0x27 +#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET			0x0f +#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET			0x10 +#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET			0x11 +#define mmUVD_NO_OP_INTERNAL_OFFSET				0x29 +#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET			0x66 +#define mmUVD_SCRATCH9_INTERNAL_OFFSET				0xc01d + +#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET			0x431 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET		0x3b4 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x3b5 +#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x25c + +#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET			0x401f + +#define VCN25_MAX_HW_INSTANCES_ARCTURUS				2 + +static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v2_5_set_powergating_state(void *handle, +				enum amd_powergating_state state); + +static int amdgpu_ih_clientid_vcns[] = { +	SOC15_IH_CLIENTID_VCN, +	SOC15_IH_CLIENTID_VCN1 +}; + +/** + * vcn_v2_5_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int vcn_v2_5_early_init(void *handle) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	if (adev->asic_type == CHIP_ARCTURUS) { +		u32 harvest; +		int i; + +		adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS; +		for (i = 0; i < adev->vcn.num_vcn_inst; i++) { +			harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING); +			if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) +				adev->vcn.harvest_config |= 1 << i; +		} + +		if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | +						 AMDGPU_VCN_HARVEST_VCN1)) +			/* both instances are harvested, disable the block */ +			return -ENOENT; +	} else +		adev->vcn.num_vcn_inst = 1; + +	adev->vcn.num_enc_rings = 2; + +	vcn_v2_5_set_dec_ring_funcs(adev); +	vcn_v2_5_set_enc_ring_funcs(adev); +	vcn_v2_5_set_jpeg_ring_funcs(adev); +	vcn_v2_5_set_irq_funcs(adev); + +	return 0; +} + +/** + * vcn_v2_5_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v2_5_sw_init(void *handle) +{ +	struct amdgpu_ring *ring; +	int i, j, r; +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +	for (j = 0; j < adev->vcn.num_vcn_inst; j++) { +		if (adev->vcn.harvest_config & (1 << j)) +			continue; +		/* VCN DEC TRAP */ +		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], +				VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq); +		if (r) +			return r; + +		/* VCN ENC TRAP */ +		for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +			r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], +				i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq); +			if (r) +				return r; +		} + +		/* VCN JPEG TRAP */ +		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], +				VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq); +		if (r) +			return r; +	} + +	r = amdgpu_vcn_sw_init(adev); +	if (r) +		return r; + +	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { +		const struct common_firmware_header *hdr; +		hdr = (const struct common_firmware_header *)adev->vcn.fw->data; +		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; +		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; +		adev->firmware.fw_size += +			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + +		if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) { +			adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; +			adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; +			adev->firmware.fw_size += +				ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); +		} +		DRM_INFO("PSP loading VCN firmware\n"); +	} + +	r = amdgpu_vcn_resume(adev); +	if (r) +		return r; + +	for (j = 0; j < adev->vcn.num_vcn_inst; j++) { +		if (adev->vcn.harvest_config & (1 << j)) +			continue; +		adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; +		adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; +		adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; +		adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; +		adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; +		adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + +		adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; +		adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9); +		adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; +		adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0); +		adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; +		adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1); +		adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; +		adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD); +		adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; +		adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP); + +		adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; +		adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH); + +		ring = &adev->vcn.inst[j].ring_dec; +		ring->use_doorbell = true; +		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j; +		sprintf(ring->name, "vcn_dec_%d", j); +		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); +		if (r) +			return r; + +		for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +			ring = &adev->vcn.inst[j].ring_enc[i]; +			ring->use_doorbell = true; +			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j; +			sprintf(ring->name, "vcn_enc_%d.%d", j, i); +			r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); +			if (r) +				return r; +		} + +		ring = &adev->vcn.inst[j].ring_jpeg; +		ring->use_doorbell = true; +		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j; +		sprintf(ring->name, "vcn_jpeg_%d", j); +		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); +		if (r) +			return r; +	} + +	return 0; +} + +/** + * vcn_v2_5_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v2_5_sw_fini(void *handle) +{ +	int r; +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +	r = amdgpu_vcn_suspend(adev); +	if (r) +		return r; + +	r = amdgpu_vcn_sw_fini(adev); + +	return r; +} + +/** + * vcn_v2_5_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v2_5_hw_init(void *handle) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	struct amdgpu_ring *ring; +	int i, j, r; + +	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { +		if (adev->vcn.harvest_config & (1 << j)) +			continue; +		ring = &adev->vcn.inst[j].ring_dec; + +		adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, +						     ring->doorbell_index, j); + +		r = amdgpu_ring_test_ring(ring); +		if (r) { +			ring->sched.ready = false; +			goto done; +		} + +		for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +			ring = &adev->vcn.inst[j].ring_enc[i]; +			ring->sched.ready = false; +			continue; +			r = amdgpu_ring_test_ring(ring); +			if (r) { +				ring->sched.ready = false; +				goto done; +			} +		} + +		ring = &adev->vcn.inst[j].ring_jpeg; +		r = amdgpu_ring_test_ring(ring); +		if (r) { +			ring->sched.ready = false; +			goto done; +		} +	} +done: +	if (!r) +		DRM_INFO("VCN decode and encode initialized successfully.\n"); + +	return r; +} + +/** + * vcn_v2_5_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v2_5_hw_fini(void *handle) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	struct amdgpu_ring *ring; +	int i; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		ring = &adev->vcn.inst[i].ring_dec; + +		if (RREG32_SOC15(VCN, i, mmUVD_STATUS)) +			vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + +		ring->sched.ready = false; + +		for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +			ring = &adev->vcn.inst[i].ring_enc[i]; +			ring->sched.ready = false; +		} + +		ring = &adev->vcn.inst[i].ring_jpeg; +		ring->sched.ready = false; +	} + +	return 0; +} + +/** + * vcn_v2_5_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v2_5_suspend(void *handle) +{ +	int r; +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +	r = vcn_v2_5_hw_fini(adev); +	if (r) +		return r; + +	r = amdgpu_vcn_suspend(adev); + +	return r; +} + +/** + * vcn_v2_5_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v2_5_resume(void *handle) +{ +	int r; +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +	r = amdgpu_vcn_resume(adev); +	if (r) +		return r; + +	r = vcn_v2_5_hw_init(adev); + +	return r; +} + +/** + * vcn_v2_5_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) +{ +	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); +	uint32_t offset; +	int i; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		/* cache window 0: fw */ +		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { +			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, +				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); +			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, +				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); +			WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); +			offset = 0; +		} else { +			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, +				lower_32_bits(adev->vcn.inst[i].gpu_addr)); +			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, +				upper_32_bits(adev->vcn.inst[i].gpu_addr)); +			offset = size; +			WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, +				AMDGPU_UVD_FIRMWARE_OFFSET >> 3); +		} +		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); + +		/* cache window 1: stack */ +		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, +			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); +		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, +			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); +		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0); +		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + +		/* cache window 2: context */ +		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, +			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); +		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, +			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); +		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0); +		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); +	} +} + +/** + * vcn_v2_5_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Disable clock gating for VCN block + */ +static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) +{ +	uint32_t data; +	int ret = 0; +	int i; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		/* UVD disable CGC */ +		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); +		if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) +			data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; +		else +			data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; +		data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; +		data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; +		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + +		data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE); +		data &= ~(UVD_CGC_GATE__SYS_MASK +			| UVD_CGC_GATE__UDEC_MASK +			| UVD_CGC_GATE__MPEG2_MASK +			| UVD_CGC_GATE__REGS_MASK +			| UVD_CGC_GATE__RBC_MASK +			| UVD_CGC_GATE__LMI_MC_MASK +			| UVD_CGC_GATE__LMI_UMC_MASK +			| UVD_CGC_GATE__IDCT_MASK +			| UVD_CGC_GATE__MPRD_MASK +			| UVD_CGC_GATE__MPC_MASK +			| UVD_CGC_GATE__LBSI_MASK +			| UVD_CGC_GATE__LRBBM_MASK +			| UVD_CGC_GATE__UDEC_RE_MASK +			| UVD_CGC_GATE__UDEC_CM_MASK +			| UVD_CGC_GATE__UDEC_IT_MASK +			| UVD_CGC_GATE__UDEC_DB_MASK +			| UVD_CGC_GATE__UDEC_MP_MASK +			| UVD_CGC_GATE__WCB_MASK +			| UVD_CGC_GATE__VCPU_MASK +			| UVD_CGC_GATE__MMSCH_MASK); + +		WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); + +		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0,  0xFFFFFFFF, ret); + +		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); +		data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK +			| UVD_CGC_CTRL__UDEC_CM_MODE_MASK +			| UVD_CGC_CTRL__UDEC_IT_MODE_MASK +			| UVD_CGC_CTRL__UDEC_DB_MODE_MASK +			| UVD_CGC_CTRL__UDEC_MP_MODE_MASK +			| UVD_CGC_CTRL__SYS_MODE_MASK +			| UVD_CGC_CTRL__UDEC_MODE_MASK +			| UVD_CGC_CTRL__MPEG2_MODE_MASK +			| UVD_CGC_CTRL__REGS_MODE_MASK +			| UVD_CGC_CTRL__RBC_MODE_MASK +			| UVD_CGC_CTRL__LMI_MC_MODE_MASK +			| UVD_CGC_CTRL__LMI_UMC_MODE_MASK +			| UVD_CGC_CTRL__IDCT_MODE_MASK +			| UVD_CGC_CTRL__MPRD_MODE_MASK +			| UVD_CGC_CTRL__MPC_MODE_MASK +			| UVD_CGC_CTRL__LBSI_MODE_MASK +			| UVD_CGC_CTRL__LRBBM_MODE_MASK +			| UVD_CGC_CTRL__WCB_MODE_MASK +			| UVD_CGC_CTRL__VCPU_MODE_MASK +			| UVD_CGC_CTRL__MMSCH_MODE_MASK); +		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + +		/* turn on */ +		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE); +		data |= (UVD_SUVD_CGC_GATE__SRE_MASK +			| UVD_SUVD_CGC_GATE__SIT_MASK +			| UVD_SUVD_CGC_GATE__SMP_MASK +			| UVD_SUVD_CGC_GATE__SCM_MASK +			| UVD_SUVD_CGC_GATE__SDB_MASK +			| UVD_SUVD_CGC_GATE__SRE_H264_MASK +			| UVD_SUVD_CGC_GATE__SRE_HEVC_MASK +			| UVD_SUVD_CGC_GATE__SIT_H264_MASK +			| UVD_SUVD_CGC_GATE__SIT_HEVC_MASK +			| UVD_SUVD_CGC_GATE__SCM_H264_MASK +			| UVD_SUVD_CGC_GATE__SCM_HEVC_MASK +			| UVD_SUVD_CGC_GATE__SDB_H264_MASK +			| UVD_SUVD_CGC_GATE__SDB_HEVC_MASK +			| UVD_SUVD_CGC_GATE__SCLR_MASK +			| UVD_SUVD_CGC_GATE__UVD_SC_MASK +			| UVD_SUVD_CGC_GATE__ENT_MASK +			| UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK +			| UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK +			| UVD_SUVD_CGC_GATE__SITE_MASK +			| UVD_SUVD_CGC_GATE__SRE_VP9_MASK +			| UVD_SUVD_CGC_GATE__SCM_VP9_MASK +			| UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK +			| UVD_SUVD_CGC_GATE__SDB_VP9_MASK +			| UVD_SUVD_CGC_GATE__IME_HEVC_MASK); +		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data); + +		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); +		data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK +			| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK +			| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK +			| UVD_SUVD_CGC_CTRL__IME_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); +		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); +	} +} + +/** + * vcn_v2_5_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Enable clock gating for VCN block + */ +static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) +{ +	uint32_t data = 0; +	int i; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		/* enable UVD CGC */ +		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); +		if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) +			data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; +		else +			data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; +		data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; +		data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; +		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + +		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); +		data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK +			| UVD_CGC_CTRL__UDEC_CM_MODE_MASK +			| UVD_CGC_CTRL__UDEC_IT_MODE_MASK +			| UVD_CGC_CTRL__UDEC_DB_MODE_MASK +			| UVD_CGC_CTRL__UDEC_MP_MODE_MASK +			| UVD_CGC_CTRL__SYS_MODE_MASK +			| UVD_CGC_CTRL__UDEC_MODE_MASK +			| UVD_CGC_CTRL__MPEG2_MODE_MASK +			| UVD_CGC_CTRL__REGS_MODE_MASK +			| UVD_CGC_CTRL__RBC_MODE_MASK +			| UVD_CGC_CTRL__LMI_MC_MODE_MASK +			| UVD_CGC_CTRL__LMI_UMC_MODE_MASK +			| UVD_CGC_CTRL__IDCT_MODE_MASK +			| UVD_CGC_CTRL__MPRD_MODE_MASK +			| UVD_CGC_CTRL__MPC_MODE_MASK +			| UVD_CGC_CTRL__LBSI_MODE_MASK +			| UVD_CGC_CTRL__LRBBM_MODE_MASK +			| UVD_CGC_CTRL__WCB_MODE_MASK +			| UVD_CGC_CTRL__VCPU_MODE_MASK); +		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + +		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); +		data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK +			| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK +			| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK +			| UVD_SUVD_CGC_CTRL__IME_MODE_MASK +			| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); +		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); +	} +} + +/** + * jpeg_v2_5_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v2_5_start(struct amdgpu_device *adev) +{ +	struct amdgpu_ring *ring; +	uint32_t tmp; +	int i; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		ring = &adev->vcn.inst[i].ring_jpeg; +		/* disable anti hang mechanism */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0, +			~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + +		/* JPEG disable CGC */ +		tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); +		tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; +		tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; +		tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; +		WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); + +		tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); +		tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK +			| JPEG_CGC_GATE__JPEG2_DEC_MASK +			| JPEG_CGC_GATE__JMCIF_MASK +			| JPEG_CGC_GATE__JRBBM_MASK); +		WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); + +		tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); +		tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK +			| JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK +			| JPEG_CGC_CTRL__JMCIF_MODE_MASK +			| JPEG_CGC_CTRL__JRBBM_MODE_MASK); +		WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); + +		/* MJPEG global tiling registers */ +		WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, +			adev->gfx.config.gb_addr_config); +		WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, +			adev->gfx.config.gb_addr_config); + +		/* enable JMI channel */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0, +			~UVD_JMI_CNTL__SOFT_RESET_MASK); + +		/* enable System Interrupt for JRBC */ +		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN), +			JPEG_SYS_INT_EN__DJRBC_MASK, +			~JPEG_SYS_INT_EN__DJRBC_MASK); + +		WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0); +		WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); +		WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, +			lower_32_bits(ring->gpu_addr)); +		WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, +			upper_32_bits(ring->gpu_addr)); +		WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0); +		WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0); +		WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); +		WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); +		ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR); +	} + +	return 0; +} + +/** + * jpeg_v2_5_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v2_5_stop(struct amdgpu_device *adev) +{ +	uint32_t tmp; +	int i; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		/* reset JMI */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), +			UVD_JMI_CNTL__SOFT_RESET_MASK, +			~UVD_JMI_CNTL__SOFT_RESET_MASK); + +		tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); +		tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK +			|JPEG_CGC_GATE__JPEG2_DEC_MASK +			|JPEG_CGC_GATE__JMCIF_MASK +			|JPEG_CGC_GATE__JRBBM_MASK); +		WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); + +		/* enable anti hang mechanism */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), +			UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, +			~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); +	} + +	return 0; +} + +static int vcn_v2_5_start(struct amdgpu_device *adev) +{ +	struct amdgpu_ring *ring; +	uint32_t rb_bufsz, tmp; +	int i, j, k, r; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		/* disable register anti-hang mechanism */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, +			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + +		/* set uvd status busy */ +		tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; +		WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp); +	} + +	/*SW clock gating */ +	vcn_v2_5_disable_clock_gating(adev); + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		/* enable VCPU clock */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), +			UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + +		/* disable master interrupt */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0, +			~UVD_MASTINT_EN__VCPU_EN_MASK); + +		/* setup mmUVD_LMI_CTRL */ +		tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL); +		tmp &= ~0xff; +		WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8| +			UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK	| +			UVD_LMI_CTRL__MASK_MC_URGENT_MASK | +			UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | +			UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + +		/* setup mmUVD_MPC_CNTL */ +		tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL); +		tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; +		tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; +		WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); + +		/* setup UVD_MPC_SET_MUXA0 */ +		WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0, +			((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | +			(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | +			(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | +			(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + +		/* setup UVD_MPC_SET_MUXB0 */ +		WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0, +			((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | +			(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | +			(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | +			(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + +		/* setup mmUVD_MPC_SET_MUX */ +		WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX, +			((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | +			(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | +			(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); +	} + +	vcn_v2_5_mc_resume(adev); + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		/* VCN global tiling registers */ +		WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, +			adev->gfx.config.gb_addr_config); +		WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, +			adev->gfx.config.gb_addr_config); + +		/* enable LMI MC and UMC channels */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, +			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + +		/* unblock VCPU register access */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0, +			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, +			~UVD_VCPU_CNTL__BLK_RST_MASK); + +		for (k = 0; k < 10; ++k) { +			uint32_t status; + +			for (j = 0; j < 100; ++j) { +				status = RREG32_SOC15(UVD, i, mmUVD_STATUS); +				if (status & 2) +					break; +				if (amdgpu_emu_mode == 1) +					msleep(500); +				else +					mdelay(10); +			} +			r = 0; +			if (status & 2) +				break; + +			DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); +			WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), +				UVD_VCPU_CNTL__BLK_RST_MASK, +				~UVD_VCPU_CNTL__BLK_RST_MASK); +			mdelay(10); +			WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, +				~UVD_VCPU_CNTL__BLK_RST_MASK); + +			mdelay(10); +			r = -1; +		} + +		if (r) { +			DRM_ERROR("VCN decode not responding, giving up!!!\n"); +			return r; +		} + +		/* enable master interrupt */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), +			UVD_MASTINT_EN__VCPU_EN_MASK, +			~UVD_MASTINT_EN__VCPU_EN_MASK); + +		/* clear the busy bit of VCN_STATUS */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0, +			~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + +		WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0); + +		ring = &adev->vcn.inst[i].ring_dec; +		/* force RBC into idle state */ +		rb_bufsz = order_base_2(ring->ring_size); +		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); +		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); +		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); +		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); +		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); +		WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp); + +		/* programm the RB_BASE for ring buffer */ +		WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, +			lower_32_bits(ring->gpu_addr)); +		WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, +			upper_32_bits(ring->gpu_addr)); + +		/* Initialize the ring buffer's read and write pointers */ +		WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0); + +		ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR); +		WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR, +				lower_32_bits(ring->wptr)); +		ring = &adev->vcn.inst[i].ring_enc[0]; +		WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); +		WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); +		WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr); +		WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); +		WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4); + +		ring = &adev->vcn.inst[i].ring_enc[1]; +		WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); +		WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); +		WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); +		WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); +		WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4); +	} +	r = jpeg_v2_5_start(adev); + +	return r; +} + +static int vcn_v2_5_stop(struct amdgpu_device *adev) +{ +	uint32_t tmp; +	int i, r; + +	r = jpeg_v2_5_stop(adev); +	if (r) +		return r; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		/* wait for vcn idle */ +		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); +		if (r) +			return r; + +		tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | +			UVD_LMI_STATUS__READ_CLEAN_MASK | +			UVD_LMI_STATUS__WRITE_CLEAN_MASK | +			UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; +		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); +		if (r) +			return r; + +		/* block LMI UMC channel */ +		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); +		tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; +		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); + +		tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| +			UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; +		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); +		if (r) +			return r; + +		/* block VCPU register access */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), +			UVD_RB_ARB_CTRL__VCPU_DIS_MASK, +			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + +		/* reset VCPU */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), +			UVD_VCPU_CNTL__BLK_RST_MASK, +			~UVD_VCPU_CNTL__BLK_RST_MASK); + +		/* disable VCPU clock */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, +			~(UVD_VCPU_CNTL__CLK_EN_MASK)); + +		/* clear status */ +		WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); + +		vcn_v2_5_enable_clock_gating(adev); + +		/* enable register anti-hang mechanism */ +		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), +			UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, +			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); +	} + +	return 0; +} + +/** + * vcn_v2_5_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); +} + +/** + * vcn_v2_5_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	if (ring->use_doorbell) +		return adev->wb.wb[ring->wptr_offs]; +	else +		return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); +} + +/** + * vcn_v2_5_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	if (ring->use_doorbell) { +		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); +		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); +	} else { +		WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); +	} +} + +static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { +	.type = AMDGPU_RING_TYPE_VCN_DEC, +	.align_mask = 0xf, +	.vmhub = AMDGPU_MMHUB_1, +	.get_rptr = vcn_v2_5_dec_ring_get_rptr, +	.get_wptr = vcn_v2_5_dec_ring_get_wptr, +	.set_wptr = vcn_v2_5_dec_ring_set_wptr, +	.emit_frame_size = +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + +		8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ +		14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ +		6, +	.emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ +	.emit_ib = vcn_v2_0_dec_ring_emit_ib, +	.emit_fence = vcn_v2_0_dec_ring_emit_fence, +	.emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, +	.test_ring = amdgpu_vcn_dec_ring_test_ring, +	.test_ib = amdgpu_vcn_dec_ring_test_ib, +	.insert_nop = vcn_v2_0_dec_ring_insert_nop, +	.insert_start = vcn_v2_0_dec_ring_insert_start, +	.insert_end = vcn_v2_0_dec_ring_insert_end, +	.pad_ib = amdgpu_ring_generic_pad_ib, +	.begin_use = amdgpu_vcn_ring_begin_use, +	.end_use = amdgpu_vcn_ring_end_use, +	.emit_wreg = vcn_v2_0_dec_ring_emit_wreg, +	.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, +	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v2_5_enc_ring_get_rptr - get enc read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc read pointer + */ +static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) +		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); +	else +		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); +} + +/** + * vcn_v2_5_enc_ring_get_wptr - get enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc write pointer + */ +static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { +		if (ring->use_doorbell) +			return adev->wb.wb[ring->wptr_offs]; +		else +			return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); +	} else { +		if (ring->use_doorbell) +			return adev->wb.wb[ring->wptr_offs]; +		else +			return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); +	} +} + +/** + * vcn_v2_5_enc_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { +		if (ring->use_doorbell) { +			adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); +			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); +		} else { +			WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); +		} +	} else { +		if (ring->use_doorbell) { +			adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); +			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); +		} else { +			WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); +		} +	} +} + +static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { +	.type = AMDGPU_RING_TYPE_VCN_ENC, +	.align_mask = 0x3f, +	.nop = VCN_ENC_CMD_NO_OP, +	.vmhub = AMDGPU_MMHUB_1, +	.get_rptr = vcn_v2_5_enc_ring_get_rptr, +	.get_wptr = vcn_v2_5_enc_ring_get_wptr, +	.set_wptr = vcn_v2_5_enc_ring_set_wptr, +	.emit_frame_size = +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + +		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ +		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ +		1, /* vcn_v2_0_enc_ring_insert_end */ +	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ +	.emit_ib = vcn_v2_0_enc_ring_emit_ib, +	.emit_fence = vcn_v2_0_enc_ring_emit_fence, +	.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, +	.test_ring = amdgpu_vcn_enc_ring_test_ring, +	.test_ib = amdgpu_vcn_enc_ring_test_ib, +	.insert_nop = amdgpu_ring_insert_nop, +	.insert_end = vcn_v2_0_enc_ring_insert_end, +	.pad_ib = amdgpu_ring_generic_pad_ib, +	.begin_use = amdgpu_vcn_ring_begin_use, +	.end_use = amdgpu_vcn_ring_end_use, +	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg, +	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, +	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v2_5_jpeg_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR); +} + +/** + * vcn_v2_5_jpeg_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	if (ring->use_doorbell) +		return adev->wb.wb[ring->wptr_offs]; +	else +		return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR); +} + +/** + * vcn_v2_5_jpeg_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	if (ring->use_doorbell) { +		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); +		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); +	} else { +		WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); +	} +} + +static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = { +	.type = AMDGPU_RING_TYPE_VCN_JPEG, +	.align_mask = 0xf, +	.vmhub = AMDGPU_MMHUB_1, +	.get_rptr = vcn_v2_5_jpeg_ring_get_rptr, +	.get_wptr = vcn_v2_5_jpeg_ring_get_wptr, +	.set_wptr = vcn_v2_5_jpeg_ring_set_wptr, +	.emit_frame_size = +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + +		8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */ +		18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */ +		8 + 16, +	.emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */ +	.emit_ib = vcn_v2_0_jpeg_ring_emit_ib, +	.emit_fence = vcn_v2_0_jpeg_ring_emit_fence, +	.emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush, +	.test_ring = amdgpu_vcn_jpeg_ring_test_ring, +	.test_ib = amdgpu_vcn_jpeg_ring_test_ib, +	.insert_nop = vcn_v2_0_jpeg_ring_nop, +	.insert_start = vcn_v2_0_jpeg_ring_insert_start, +	.insert_end = vcn_v2_0_jpeg_ring_insert_end, +	.pad_ib = amdgpu_ring_generic_pad_ib, +	.begin_use = amdgpu_vcn_ring_begin_use, +	.end_use = amdgpu_vcn_ring_end_use, +	.emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg, +	.emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait, +	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) +{ +	int i; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; +		adev->vcn.inst[i].ring_dec.me = i; +		DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); +	} +} + +static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) +{ +	int i, j; + +	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { +		if (adev->vcn.harvest_config & (1 << j)) +			continue; +		for (i = 0; i < adev->vcn.num_enc_rings; ++i) { +			adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; +			adev->vcn.inst[j].ring_enc[i].me = j; +		} +		DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j); +	} +} + +static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev) +{ +	int i; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs; +		adev->vcn.inst[i].ring_jpeg.me = i; +		DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i); +	} +} + +static bool vcn_v2_5_is_idle(void *handle) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	int i, ret = 1; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); +	} + +	return ret; +} + +static int vcn_v2_5_wait_for_idle(void *handle) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	int i, ret = 0; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, +			UVD_STATUS__IDLE, ret); +		if (ret) +			return ret; +	} + +	return ret; +} + +static int vcn_v2_5_set_clockgating_state(void *handle, +					  enum amd_clockgating_state state) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + +	if (enable) { +		if (vcn_v2_5_is_idle(handle)) +			return -EBUSY; +		vcn_v2_5_enable_clock_gating(adev); +	} else { +		vcn_v2_5_disable_clock_gating(adev); +	} + +	return 0; +} + +static int vcn_v2_5_set_powergating_state(void *handle, +					  enum amd_powergating_state state) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	int ret; + +	if(state == adev->vcn.cur_state) +		return 0; + +	if (state == AMD_PG_STATE_GATE) +		ret = vcn_v2_5_stop(adev); +	else +		ret = vcn_v2_5_start(adev); + +	if(!ret) +		adev->vcn.cur_state = state; + +	return ret; +} + +static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev, +					struct amdgpu_irq_src *source, +					unsigned type, +					enum amdgpu_interrupt_state state) +{ +	return 0; +} + +static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, +				      struct amdgpu_irq_src *source, +				      struct amdgpu_iv_entry *entry) +{ +	uint32_t ip_instance; + +	switch (entry->client_id) { +	case SOC15_IH_CLIENTID_VCN: +		ip_instance = 0; +		break; +	case SOC15_IH_CLIENTID_VCN1: +		ip_instance = 1; +		break; +	default: +		DRM_ERROR("Unhandled client id: %d\n", entry->client_id); +		return 0; +	} + +	DRM_DEBUG("IH: VCN TRAP\n"); + +	switch (entry->src_id) { +	case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: +		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); +		break; +	case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: +		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); +		break; +	case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: +		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); +		break; +	case VCN_2_0__SRCID__JPEG_DECODE: +		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg); +		break; +	default: +		DRM_ERROR("Unhandled interrupt: %d %d\n", +			  entry->src_id, entry->src_data[0]); +		break; +	} + +	return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = { +	.set = vcn_v2_5_set_interrupt_state, +	.process = vcn_v2_5_process_interrupt, +}; + +static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) +{ +	int i; + +	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2; +		adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; +	} +} + +static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { +	.name = "vcn_v2_5", +	.early_init = vcn_v2_5_early_init, +	.late_init = NULL, +	.sw_init = vcn_v2_5_sw_init, +	.sw_fini = vcn_v2_5_sw_fini, +	.hw_init = vcn_v2_5_hw_init, +	.hw_fini = vcn_v2_5_hw_fini, +	.suspend = vcn_v2_5_suspend, +	.resume = vcn_v2_5_resume, +	.is_idle = vcn_v2_5_is_idle, +	.wait_for_idle = vcn_v2_5_wait_for_idle, +	.check_soft_reset = NULL, +	.pre_soft_reset = NULL, +	.soft_reset = NULL, +	.post_soft_reset = NULL, +	.set_clockgating_state = vcn_v2_5_set_clockgating_state, +	.set_powergating_state = vcn_v2_5_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vcn_v2_5_ip_block = +{ +		.type = AMD_IP_BLOCK_TYPE_VCN, +		.major = 2, +		.minor = 5, +		.rev = 0, +		.funcs = &vcn_v2_5_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h new file mode 100644 index 000000000000..8d9c0800b8e0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V2_5_H__ +#define __VCN_V2_5_H__ + +extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block; + +#endif /* __VCN_V2_5_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 22260e6963b8..9eae3536ddad 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -50,7 +50,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); -	if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +	if (amdgpu_sriov_vf(adev)) {  		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {  			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");  			return; @@ -64,7 +64,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,  					   RB_ENABLE, 1); -		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +		if (amdgpu_sriov_vf(adev)) {  			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,  						ih_rb_cntl)) {  				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -80,7 +80,7 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,  					   RB_ENABLE, 1); -		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +		if (amdgpu_sriov_vf(adev)) {  			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,  						ih_rb_cntl)) {  				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); @@ -106,7 +106,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); -	if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +	if (amdgpu_sriov_vf(adev)) {  		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {  			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");  			return; @@ -125,7 +125,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,  					   RB_ENABLE, 0); -		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +		if (amdgpu_sriov_vf(adev)) {  			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,  						ih_rb_cntl)) {  				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -145,7 +145,7 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,  					   RB_ENABLE, 0); -		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +		if (amdgpu_sriov_vf(adev)) {  			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,  						ih_rb_cntl)) {  				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); @@ -219,7 +219,7 @@ static uint32_t vega10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih)  static int vega10_ih_irq_init(struct amdgpu_device *adev)  {  	struct amdgpu_ih_ring *ih; -	u32 ih_rb_cntl; +	u32 ih_rb_cntl, ih_chicken;  	int ret = 0;  	u32 tmp; @@ -234,11 +234,17 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)  	WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff);  	ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); +	ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);  	ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); +	if (adev->irq.ih.use_bus_addr) { +		ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); +	} else { +		ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_FBPA_ENABLE, 1); +	}  	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,  				   !!adev->irq.msi_enabled); -	if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +	if (amdgpu_sriov_vf(adev)) {  		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {  			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");  			return -ETIMEDOUT; @@ -247,6 +253,11 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)  		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);  	} +	if ((adev->asic_type == CHIP_ARCTURUS +		&& adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) +		|| adev->asic_type == CHIP_RENOIR) +		WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); +  	/* set the writeback address whether it's enabled or not */  	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,  		     lower_32_bits(ih->wptr_addr)); @@ -272,7 +283,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)  					   WPTR_OVERFLOW_ENABLE, 0);  		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,  					   RB_FULL_DRAIN_ENABLE, 1); -		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +		if (amdgpu_sriov_vf(adev)) {  			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,  						ih_rb_cntl)) {  				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -299,7 +310,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)  		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);  		ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); -		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { +		if (amdgpu_sriov_vf(adev)) {  			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,  						ih_rb_cntl)) {  				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index a8e92638a2e8..bd0580334f83 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -81,6 +81,10 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev)  	adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3;  	adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5;  	adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7; +	adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL64_VCN0_1; +	adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_DOORBELL64_VCN2_3; +	adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_DOORBELL64_VCN4_5; +	adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_DOORBELL64_VCN6_7;  	adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL64_FIRST_NON_CP;  	adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 0db84386252a..587e33f5dcce 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -50,6 +50,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev)  		adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));  		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));  		adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); +		adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i])); +		adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i]));  	}  	return 0;  } @@ -85,6 +87,10 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)  	adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3;  	adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5;  	adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7; +	adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCN0_1; +	adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCN2_3; +	adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCN4_5; +	adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCN6_7;  	adev->doorbell_index.first_non_cp = AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP;  	adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 6575ddcfcf00..5f8c8786cac5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -711,6 +711,12 @@ static int vi_asic_reset(struct amdgpu_device *adev)  	return r;  } +static enum amd_reset_method +vi_asic_reset_method(struct amdgpu_device *adev) +{ +	return AMD_RESET_METHOD_LEGACY; +} +  static u32 vi_get_config_memsize(struct amdgpu_device *adev)  {  	return RREG32(mmCONFIG_MEMSIZE); @@ -1023,6 +1029,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =  	.read_bios_from_rom = &vi_read_bios_from_rom,  	.read_register = &vi_read_register,  	.reset = &vi_asic_reset, +	.reset_method = &vi_asic_reset_method,  	.set_vga_state = &vi_vga_set_state,  	.get_xclk = &vi_get_xclk,  	.set_uvd_clocks = &vi_set_uvd_clocks,  | 
