diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
169 files changed, 5842 insertions, 2138 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 39976c7b100c..6bf6cfaea3f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -55,7 +55,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \  	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \  	amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \  	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ -	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o +	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ +	amdgpu_fw_attestation.o  amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o @@ -69,7 +70,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce  amdgpu-y += \  	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \  	vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ -	arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o +	arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o vangogh_reg_init.o \ +	nbio_v7_2.o dimgrey_cavefish_reg_init.o  # add DF block  amdgpu-y += \ @@ -81,7 +83,7 @@ amdgpu-y += \  	gmc_v7_0.o \  	gmc_v8_0.o \  	gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ -	gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o +	gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o  # add UMC block  amdgpu-y += \ @@ -165,6 +167,11 @@ amdgpu-y += \  	athub_v2_0.o \  	athub_v2_1.o +# add SMUIO block +amdgpu-y += \ +	smuio_v9_0.o \ +	smuio_v11_0.o +  # add amdkfd interfaces  amdgpu-y += amdgpu_amdkfd.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 87f095dc385c..5993dd0fdd8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -106,6 +106,7 @@  #include "amdgpu_mmhub.h"  #include "amdgpu_gfxhub.h"  #include "amdgpu_df.h" +#include "amdgpu_smuio.h"  #define MAX_GPU_INSTANCE		16 @@ -193,9 +194,9 @@ extern int sched_policy;  extern bool debug_evictions;  extern bool no_system_mem_limit;  #else -static const int sched_policy = KFD_SCHED_POLICY_HWS; -static const bool debug_evictions; /* = false */ -static const bool no_system_mem_limit; +static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS; +static const bool __maybe_unused debug_evictions; /* = false */ +static const bool __maybe_unused no_system_mem_limit;  #endif  extern int amdgpu_tmz; @@ -623,6 +624,8 @@ struct amdgpu_asic_funcs {  	bool (*supports_baco)(struct amdgpu_device *adev);  	/* pre asic_init quirks */  	void (*pre_asic_init)(struct amdgpu_device *adev); +	/* enter/exit umd stable pstate */ +	int (*update_umd_stable_pstate)(struct amdgpu_device *adev, bool enter);  };  /* @@ -723,6 +726,45 @@ struct amd_powerplay {  	const struct amd_pm_funcs *pp_funcs;  }; +/* polaris10 kickers */ +#define ASICID_IS_P20(did, rid)		(((did == 0x67DF) && \ +					 ((rid == 0xE3) || \ +					  (rid == 0xE4) || \ +					  (rid == 0xE5) || \ +					  (rid == 0xE7) || \ +					  (rid == 0xEF))) || \ +					 ((did == 0x6FDF) && \ +					 ((rid == 0xE7) || \ +					  (rid == 0xEF) || \ +					  (rid == 0xFF)))) + +#define ASICID_IS_P30(did, rid)		((did == 0x67DF) && \ +					((rid == 0xE1) || \ +					 (rid == 0xF7))) + +/* polaris11 kickers */ +#define ASICID_IS_P21(did, rid)		(((did == 0x67EF) && \ +					 ((rid == 0xE0) || \ +					  (rid == 0xE5))) || \ +					 ((did == 0x67FF) && \ +					 ((rid == 0xCF) || \ +					  (rid == 0xEF) || \ +					  (rid == 0xFF)))) + +#define ASICID_IS_P31(did, rid)		((did == 0x67EF) && \ +					((rid == 0xE2))) + +/* polaris12 kickers */ +#define ASICID_IS_P23(did, rid)		(((did == 0x6987) && \ +					 ((rid == 0xC0) || \ +					  (rid == 0xC1) || \ +					  (rid == 0xC3) || \ +					  (rid == 0xC7))) || \ +					 ((did == 0x6981) && \ +					 ((rid == 0x00) || \ +					  (rid == 0x01) || \ +					  (rid == 0x10)))) +  #define AMDGPU_RESET_MAGIC_NUM 64  #define AMDGPU_MAX_DF_PERFMONS 4  struct amdgpu_device { @@ -879,6 +921,9 @@ struct amdgpu_device {  	/* nbio */  	struct amdgpu_nbio		nbio; +	/* smuio */ +	struct amdgpu_smuio		smuio; +  	/* mmhub */  	struct amdgpu_mmhub		mmhub; @@ -979,6 +1024,7 @@ struct amdgpu_device {  	/* enable runtime pm on the device */  	bool                            runpm;  	bool                            in_runpm; +	bool                            has_pr3;  	bool                            pm_sysfs_en;  	bool                            ucode_sysfs_en; @@ -1165,6 +1211,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);  #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))  #define amdgpu_asic_supports_baco(adev) (adev)->asic_funcs->supports_baco((adev))  #define amdgpu_asic_pre_asic_init(adev) (adev)->asic_funcs->pre_asic_init((adev)) +#define amdgpu_asic_update_umd_stable_pstate(adev, enter) \ +	((adev)->asic_funcs->update_umd_stable_pstate ? (adev)->asic_funcs->update_umd_stable_pstate((adev), (enter)) : 0)  #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); @@ -1183,6 +1231,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,  					     const u32 *registers,  					     const u32 array_size); +bool amdgpu_device_supports_atpx(struct drm_device *dev);  bool amdgpu_device_supports_boco(struct drm_device *dev);  bool amdgpu_device_supports_baco(struct drm_device *dev);  bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, @@ -1233,6 +1282,8 @@ int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);  void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);  long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,  			     unsigned long arg); +int amdgpu_info_ioctl(struct drm_device *dev, void *data, +		      struct drm_file *filp);  /*   * functions used by amdgpu_encoder.c @@ -1264,9 +1315,11 @@ int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);  void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,  		struct amdgpu_dm_backlight_caps *caps); +bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev);  #else  static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }  static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } +static inline bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { return false; }  #endif  int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, @@ -1294,19 +1347,6 @@ bool amdgpu_device_load_pci_state(struct pci_dev *pdev);  #include "amdgpu_object.h" -/* used by df_v3_6.c and amdgpu_pmu.c */ -#define AMDGPU_PMU_ATTR(_name, _object)					\ -static ssize_t								\ -_name##_show(struct device *dev,					\ -			       struct device_attribute *attr,		\ -			       char *page)				\ -{									\ -	BUILD_BUG_ON(sizeof(_object) >= PAGE_SIZE - 1);			\ -	return sprintf(page, _object "\n");				\ -}									\ -									\ -static struct device_attribute pmu_attr_##_name = __ATTR_RO(_name) -  static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)  {         return adev->gmc.tmz_enabled; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index d3e51d361179..b8655ff73a65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -36,17 +36,17 @@  #include "acp_gfx_if.h" -#define ACP_TILE_ON_MASK                	0x03 -#define ACP_TILE_OFF_MASK               	0x02 -#define ACP_TILE_ON_RETAIN_REG_MASK     	0x1f -#define ACP_TILE_OFF_RETAIN_REG_MASK    	0x20 +#define ACP_TILE_ON_MASK			0x03 +#define ACP_TILE_OFF_MASK			0x02 +#define ACP_TILE_ON_RETAIN_REG_MASK		0x1f +#define ACP_TILE_OFF_RETAIN_REG_MASK		0x20 -#define ACP_TILE_P1_MASK                	0x3e -#define ACP_TILE_P2_MASK                	0x3d -#define ACP_TILE_DSP0_MASK              	0x3b -#define ACP_TILE_DSP1_MASK              	0x37 +#define ACP_TILE_P1_MASK			0x3e +#define ACP_TILE_P2_MASK			0x3d +#define ACP_TILE_DSP0_MASK			0x3b +#define ACP_TILE_DSP1_MASK			0x37 -#define ACP_TILE_DSP2_MASK              	0x2f +#define ACP_TILE_DSP2_MASK			0x2f  #define ACP_DMA_REGS_END			0x146c0  #define ACP_I2S_PLAY_REGS_START			0x14840 @@ -75,8 +75,8 @@  #define mmACP_CONTROL				0x5131  #define mmACP_STATUS				0x5133  #define mmACP_SOFT_RESET			0x5134 -#define ACP_CONTROL__ClkEn_MASK 		0x1 -#define ACP_SOFT_RESET__SoftResetAud_MASK 	0x100 +#define ACP_CONTROL__ClkEn_MASK			0x1 +#define ACP_SOFT_RESET__SoftResetAud_MASK	0x100  #define ACP_SOFT_RESET__SoftResetAudDone_MASK	0x1000000  #define ACP_CLOCK_EN_TIME_OUT_VALUE		0x000000FF  #define ACP_SOFT_RESET_DONE_TIME_OUT_VALUE	0x000000FF @@ -176,7 +176,7 @@ static struct device *get_mfd_cell_dev(const char *device_name, int r)  /**   * acp_hw_init - start and test ACP block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   */  static int acp_hw_init(void *handle) @@ -405,7 +405,7 @@ failure:  /**   * acp_hw_fini - stop the hardware block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   */  static int acp_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 165b02e267b0..8155c54392c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -27,6 +27,7 @@  #include <linux/power_supply.h>  #include <linux/pm_runtime.h>  #include <acpi/video.h> +#include <acpi/actbl.h>  #include <drm/drm_crtc_helper.h>  #include "amdgpu.h" @@ -894,3 +895,18 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)  	unregister_acpi_notifier(&adev->acpi_nb);  	kfree(adev->atif);  } + +/** + * amdgpu_acpi_is_s0ix_supported + * + * returns true if supported, false if not. + */ +bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) +{ +	if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { +		if (adev->flags & AMD_IS_APU) +			return true; +	} + +	return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0544460653b9..db96d69eb45e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -390,23 +390,17 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,  				      struct kfd_local_mem_info *mem_info)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)kgd; -	uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : -					     ~((1ULL << 32) - 1); -	resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size;  	memset(mem_info, 0, sizeof(*mem_info)); -	if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) { -		mem_info->local_mem_size_public = adev->gmc.visible_vram_size; -		mem_info->local_mem_size_private = adev->gmc.real_vram_size - -				adev->gmc.visible_vram_size; -	} else { -		mem_info->local_mem_size_public = 0; -		mem_info->local_mem_size_private = adev->gmc.real_vram_size; -	} + +	mem_info->local_mem_size_public = adev->gmc.visible_vram_size; +	mem_info->local_mem_size_private = adev->gmc.real_vram_size - +						adev->gmc.visible_vram_size; +  	mem_info->vram_width = adev->gmc.vram_width; -	pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n", -			&adev->gmc.aper_base, &aper_limit, +	pr_debug("Address base: %pap public 0x%llx private 0x%llx\n", +			&adev->gmc.aper_base,  			mem_info->local_mem_size_public,  			mem_info->local_mem_size_private); @@ -648,6 +642,13 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)kgd; +	/* Temp workaround to fix the soft hang observed in certain compute +	 * applications if GFXOFF is enabled. +	 */ +	if (adev->asic_type == CHIP_SIENNA_CICHLID) { +		pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); +		amdgpu_gfx_off_ctrl(adev, idle); +	}  	amdgpu_dpm_switch_power_profile(adev,  					PP_SMC_POWER_PROFILE_COMPUTE,  					!idle); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 1afa8f122e7d..604757a1e440 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -304,4 +304,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {  				kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,  	.set_vm_context_page_table_base =  				kgd_gfx_v9_set_vm_context_page_table_base, +	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy  }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 43b18863a8b8..b43e68fc1378 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -37,6 +37,7 @@  #include "soc15.h"  #include "soc15d.h"  #include "gfx_v9_0.h" +#include "amdgpu_amdkfd_gfx_v9.h"  enum hqd_dequeue_request_type {  	NO_ACTION = 0, @@ -799,7 +800,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,   *   *  Reading registers referenced above involves programming GRBM appropriately   */ -static void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, +void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid,  		int *pasid_wave_cnt, int *max_waves_per_cu)  {  	int qidx; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index fc8934b86d93..e64deba8900f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -63,3 +63,5 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,  void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,  			uint32_t vmid, uint64_t page_table_base); +void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, +		int *pasid_wave_cnt, int *max_waves_per_cu); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 5da487b64a66..2d991da2cead 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -239,8 +239,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,  	if (!old)  		return 0; -	new = kmalloc(offsetof(typeof(*new), shared[old->shared_max]), -		      GFP_KERNEL); +	new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL);  	if (!new)  		return -ENOMEM; @@ -1115,19 +1114,19 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)  void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)  {  	struct amdgpu_device *adev = get_amdgpu_device(kgd); -        struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; +	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;  	if (WARN_ON(!kgd || !vm)) -                return; +		return; -        pr_debug("Releasing process vm %p\n", vm); +	pr_debug("Releasing process vm %p\n", vm); -        /* The original pasid of amdgpu vm has already been -         * released during making a amdgpu vm to a compute vm -         * The current pasid is managed by kfd and will be -         * released on kfd process destroy. Set amdgpu pasid -         * to 0 to avoid duplicate release. -         */ +	/* The original pasid of amdgpu vm has already been +	 * released during making a amdgpu vm to a compute vm +	 * The current pasid is managed by kfd and will be +	 * released on kfd process destroy. Set amdgpu pasid +	 * to 0 to avoid duplicate release. +	 */  	amdgpu_vm_release_compute(adev, avm);  } @@ -1214,7 +1213,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(  	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);  	if (ret) { -		pr_debug("Insufficient system memory\n"); +		pr_debug("Insufficient memory\n");  		goto err_reserve_limit;  	} @@ -1288,7 +1287,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(  	struct ttm_validate_buffer *bo_list_entry;  	unsigned int mapped_to_gpu_memory;  	int ret; -	bool is_imported = 0; +	bool is_imported = false;  	mutex_lock(&mem->lock);  	mapped_to_gpu_memory = mem->mapped_to_gpu_memory; @@ -1479,7 +1478,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(  		}  	} -	if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) +	if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)  		amdgpu_bo_fence(bo,  				&avm->process_info->eviction_fence->base,  				true); @@ -1558,7 +1557,8 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(  	 * required.  	 */  	if (mem->mapped_to_gpu_memory == 0 && -	    !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) +	    !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && +	    !mem->bo->tbo.pin_count)  		amdgpu_amdkfd_remove_eviction_fence(mem->bo,  						process_info->eviction_fence); @@ -1694,7 +1694,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,  	INIT_LIST_HEAD(&(*mem)->bo_va_list);  	mutex_init(&(*mem)->lock); -	 +  	(*mem)->alloc_flags =  		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?  		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT) @@ -2043,6 +2043,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)  	int ret = 0, i;  	struct list_head duplicate_save;  	struct amdgpu_sync sync_obj; +	unsigned long failed_size = 0; +	unsigned long total_size = 0;  	INIT_LIST_HEAD(&duplicate_save);  	INIT_LIST_HEAD(&ctx.list); @@ -2099,10 +2101,18 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)  		uint32_t domain = mem->domain;  		struct kfd_bo_va_list *bo_va_entry; +		total_size += amdgpu_bo_size(bo); +  		ret = amdgpu_amdkfd_bo_validate(bo, domain, false);  		if (ret) { -			pr_debug("Memory eviction: Validate BOs failed. Try again\n"); -			goto validate_map_fail; +			pr_debug("Memory eviction: Validate BOs failed\n"); +			failed_size += amdgpu_bo_size(bo); +			ret = amdgpu_amdkfd_bo_validate(bo, +						AMDGPU_GEM_DOMAIN_GTT, false); +			if (ret) { +				pr_debug("Memory eviction: Try again\n"); +				goto validate_map_fail; +			}  		}  		ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);  		if (ret) { @@ -2122,6 +2132,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)  		}  	} +	if (failed_size) +		pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); +  	/* Update page directories */  	ret = process_update_pds(process_info, &sync_obj);  	if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 469352e2d6ec..86add0f4ea4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1401,7 +1401,7 @@ static ATOM_VOLTAGE_OBJECT_V3 *amdgpu_atombios_lookup_voltage_object_v3(ATOM_VOL  {  	u32 size = le16_to_cpu(v3->sHeader.usStructureSize);  	u32 offset = offsetof(ATOM_VOLTAGE_OBJECT_INFO_V3_1, asVoltageObj[0]); -	u8 *start = (u8*)v3; +	u8 *start = (u8 *)v3;  	while (offset < size) {  		ATOM_VOLTAGE_OBJECT_V3 *vo = (ATOM_VOLTAGE_OBJECT_V3 *)(start + offset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index b4df6460e45a..306077884a67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -70,7 +70,7 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)  	struct atom_context *ctx = adev->mode_info.atom_context;  	int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,  						vram_usagebyfirmware); -	struct vram_usagebyfirmware_v2_1 *	firmware_usage; +	struct vram_usagebyfirmware_v2_1 *firmware_usage;  	uint32_t start_addr, size;  	uint16_t data_offset;  	int usage_bytes = 0; @@ -149,6 +149,10 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,  		case LpDdr4MemType:  			vram_type = AMDGPU_VRAM_TYPE_DDR4;  			break; +		case Ddr5MemType: +		case LpDdr5MemType: +			vram_type = AMDGPU_VRAM_TYPE_DDR5; +			break;  		default:  			vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;  			break; @@ -544,6 +548,7 @@ int amdgpu_mem_train_support(struct amdgpu_device *adev)  		case HW_REV(11, 0, 5):  		case HW_REV(11, 0, 7):  		case HW_REV(11, 0, 11): +		case HW_REV(11, 0, 12):  			ret = 1;  			break;  		default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 78ac6dbe70d8..f1a050379190 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -352,17 +352,10 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,  				break;  			case CHIP_POLARIS11:  				if (type == CGS_UCODE_ID_SMU) { -					if (((adev->pdev->device == 0x67ef) && -					     ((adev->pdev->revision == 0xe0) || -					      (adev->pdev->revision == 0xe5))) || -					    ((adev->pdev->device == 0x67ff) && -					     ((adev->pdev->revision == 0xcf) || -					      (adev->pdev->revision == 0xef) || -					      (adev->pdev->revision == 0xff)))) { +					if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) {  						info->is_kicker = true;  						strcpy(fw_name, "amdgpu/polaris11_k_smc.bin"); -					} else if ((adev->pdev->device == 0x67ef) && -						   (adev->pdev->revision == 0xe2)) { +					} else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) {  						info->is_kicker = true;  						strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin");  					} else { @@ -374,21 +367,10 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,  				break;  			case CHIP_POLARIS10:  				if (type == CGS_UCODE_ID_SMU) { -					if (((adev->pdev->device == 0x67df) && -					     ((adev->pdev->revision == 0xe0) || -					      (adev->pdev->revision == 0xe3) || -					      (adev->pdev->revision == 0xe4) || -					      (adev->pdev->revision == 0xe5) || -					      (adev->pdev->revision == 0xe7) || -					      (adev->pdev->revision == 0xef))) || -					    ((adev->pdev->device == 0x6fdf) && -					     ((adev->pdev->revision == 0xef) || -					      (adev->pdev->revision == 0xff)))) { +					if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) {  						info->is_kicker = true;  						strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); -					} else if ((adev->pdev->device == 0x67df) && -						   ((adev->pdev->revision == 0xe1) || -						    (adev->pdev->revision == 0xf7))) { +					} else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) {  						info->is_kicker = true;  						strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin");  					} else { @@ -399,13 +381,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,  				}  				break;  			case CHIP_POLARIS12: -				if (((adev->pdev->device == 0x6987) && -				     ((adev->pdev->revision == 0xc0) || -				      (adev->pdev->revision == 0xc3))) || -				    ((adev->pdev->device == 0x6981) && -				     ((adev->pdev->revision == 0x00) || -				      (adev->pdev->revision == 0x01) || -				      (adev->pdev->revision == 0x10)))) { +				if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) {  					info->is_kicker = true;  					strcpy(fw_name, "amdgpu/polaris12_k_smc.bin");  				} else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 65d1b23d7e74..b9c11c2b2885 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -1414,10 +1414,12 @@ out:  		pm_runtime_put_autosuspend(connector->dev->dev);  	} -	drm_dp_set_subconnector_property(&amdgpu_connector->base, -					 ret, -					 amdgpu_dig_connector->dpcd, -					 amdgpu_dig_connector->downstream_ports); +	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort || +	    connector->connector_type == DRM_MODE_CONNECTOR_eDP) +		drm_dp_set_subconnector_property(&amdgpu_connector->base, +						 ret, +						 amdgpu_dig_connector->dpcd, +						 amdgpu_dig_connector->downstream_ports);  	return ret;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 12598a4b5c78..594a0108e90f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -326,7 +326,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,  	increment_us = time_us - adev->mm_stats.last_update_us;  	adev->mm_stats.last_update_us = time_us;  	adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, -                                      us_upper_bound); +				      us_upper_bound);  	/* This prevents the short period of low performance when the VRAM  	 * usage is low and the driver is in debt or doesn't have enough @@ -404,13 +404,12 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,  	struct ttm_operation_ctx ctx = {  		.interruptible = true,  		.no_wait_gpu = false, -		.resv = bo->tbo.base.resv, -		.flags = 0 +		.resv = bo->tbo.base.resv  	};  	uint32_t domain;  	int r; -	if (bo->pin_count) +	if (bo->tbo.pin_count)  		return 0;  	/* Don't move this buffer if we have depleted our allowance @@ -677,6 +676,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)   * cs_parser_fini() - clean parser states   * @parser:	parser structure holding parsing context.   * @error:	error number + * @backoff:	indicator to backoff the reservation   *   * If error is set than unvalidate buffer, otherwise just free memory   * used by parsing context. @@ -1461,7 +1461,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,  		dma_fence_put(fence);  		if (r)  			return r; -		r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle); +		r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);  		drm_syncobj_put(syncobj);  		return r; @@ -1645,6 +1645,7 @@ err_free_fences:   * @parser: command submission parser context   * @addr: VM address   * @bo: resulting BO of the mapping found + * @map: Placeholder to return found BO mapping   *   * Search the buffer objects in the command submission context for a certain   * virtual memory address. Returns allocation structure when found, NULL diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 08047bc4d588..da21e60bb827 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -37,10 +37,9 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)  int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo,  				u32 domain, uint32_t size)  { -	int r;  	void *ptr; -	r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, +	amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,  				domain, bo,  				NULL, &ptr);  	if (!*bo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c80d8339f58c..0350205c4897 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -100,7 +100,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,  	enum drm_sched_priority priority;  	int r; -	entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]), +	entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),  			 GFP_KERNEL);  	if (!entity)  		return  -ENOMEM; @@ -450,7 +450,7 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)  void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,  			  struct drm_sched_entity *entity, -			  struct dma_fence *fence, uint64_t* handle) +			  struct dma_fence *fence, uint64_t *handle)  {  	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);  	uint64_t seq = centity->sequence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 2d125b8b15ee..a6667a2ca0db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -35,6 +35,7 @@  #include "amdgpu_dm_debugfs.h"  #include "amdgpu_ras.h"  #include "amdgpu_rap.h" +#include "amdgpu_fw_attestation.h"  /**   * amdgpu_debugfs_add_files - Add simple debugfs entries @@ -169,14 +170,14 @@ static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)   *   * Bit 62:  Indicates a GRBM bank switch is needed   * Bit 61:  Indicates a SRBM bank switch is needed (implies bit 62 is - * 			zero) + * 	    zero)   * Bits 24..33: The SE or ME selector if needed   * Bits 34..43: The SH (or SA) or PIPE selector if needed   * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed   *   * Bit 23:  Indicates that the PM power gating lock should be held - * 			This is necessary to read registers that might be - * 			unreliable during a power gating transistion. + * 	    This is necessary to read registers that might be + * 	    unreliable during a power gating transistion.   *   * The lower bits are the BYTE offset of the register to read.  This   * allows reading multiple registers in a single call and having @@ -299,7 +300,7 @@ end:  	return result;  } -/** +/*   * amdgpu_debugfs_regs_read - Callback for reading MMIO registers   */  static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, @@ -308,7 +309,7 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,  	return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);  } -/** +/*   * amdgpu_debugfs_regs_write - Callback for writing MMIO registers   */  static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, @@ -864,7 +865,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,  {  	struct amdgpu_device *adev = f->f_inode->i_private;  	int r, x; -	ssize_t result=0; +	ssize_t result = 0;  	uint32_t offset, se, sh, cu, wave, simd, data[32];  	if (size & 3 || *pos & 3) @@ -1210,7 +1211,7 @@ static const char *debugfs_regs_names[] = {  /**   * amdgpu_debugfs_regs_init -	Initialize debugfs entries that provide - * 								register access. + * 				register access.   *   * @adev: The device to attach the debugfs entries to   */ @@ -1319,6 +1320,7 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)  	struct drm_info_node *node = (struct drm_info_node *)m->private;  	struct drm_device *dev = node->minor->dev;  	struct amdgpu_device *adev = drm_to_adev(dev); +	struct ttm_resource_manager *man;  	int r;  	r = pm_runtime_get_sync(dev->dev); @@ -1327,7 +1329,9 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)  		return r;  	} -	seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT)); +	man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); +	r = ttm_resource_manager_evict_all(&adev->mman.bdev, man); +	seq_printf(m, "(%d)\n", r);  	pm_runtime_mark_last_busy(dev->dev);  	pm_runtime_put_autosuspend(dev->dev); @@ -1335,11 +1339,41 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)  	return 0;  } +static int amdgpu_debugfs_vm_info(struct seq_file *m, void *data) +{ +	struct drm_info_node *node = (struct drm_info_node *)m->private; +	struct drm_device *dev = node->minor->dev; +	struct drm_file *file; +	int r; + +	r = mutex_lock_interruptible(&dev->filelist_mutex); +	if (r) +		return r; + +	list_for_each_entry(file, &dev->filelist, lhead) { +		struct amdgpu_fpriv *fpriv = file->driver_priv; +		struct amdgpu_vm *vm = &fpriv->vm; + +		seq_printf(m, "pid:%d\tProcess:%s ----------\n", +				vm->task_info.pid, vm->task_info.process_name); +		r = amdgpu_bo_reserve(vm->root.base.bo, true); +		if (r) +			break; +		amdgpu_debugfs_vm_bo_info(vm, m); +		amdgpu_bo_unreserve(vm->root.base.bo); +	} + +	mutex_unlock(&dev->filelist_mutex); + +	return r; +} +  static const struct drm_info_list amdgpu_debugfs_list[] = {  	{"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},  	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},  	{"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram},  	{"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt}, +	{"amdgpu_vm_info", &amdgpu_debugfs_vm_info},  };  static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring, @@ -1635,6 +1669,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)  	amdgpu_rap_debugfs_init(adev); +	amdgpu_fw_attestation_debugfs_init(adev); +  	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,  					ARRAY_SIZE(amdgpu_debugfs_list));  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 37da3537ba2e..1cb7d73f7317 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -80,6 +80,8 @@ MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");  MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");  MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");  MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_gpu_info.bin");  #define AMDGPU_RESUME_MS		2000 @@ -114,6 +116,8 @@ const char *amdgpu_asic_name[] = {  	"NAVI12",  	"SIENNA_CICHLID",  	"NAVY_FLOUNDER", +	"VANGOGH", +	"DIMGREY_CAVEFISH",  	"LAST",  }; @@ -208,14 +212,14 @@ static DEVICE_ATTR(serial_number, S_IRUGO,  		amdgpu_device_get_serial_number, NULL);  /** - * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control + * amdgpu_device_supports_atpx - Is the device a dGPU with HG/PX power control   *   * @dev: drm_device pointer   *   * Returns true if the device is a dGPU with HG/PX power control,   * otherwise return false.   */ -bool amdgpu_device_supports_boco(struct drm_device *dev) +bool amdgpu_device_supports_atpx(struct drm_device *dev)  {  	struct amdgpu_device *adev = drm_to_adev(dev); @@ -225,6 +229,23 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)  }  /** + * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources + * + * @dev: drm_device pointer + * + * Returns true if the device is a dGPU with HG/PX power control, + * otherwise return false. + */ +bool amdgpu_device_supports_boco(struct drm_device *dev) +{ +	struct amdgpu_device *adev = drm_to_adev(dev); + +	if (adev->has_pr3) +		return true; +	return false; +} + +/**   * amdgpu_device_supports_baco - Does the device support BACO   *   * @dev: drm_device pointer @@ -239,9 +260,11 @@ bool amdgpu_device_supports_baco(struct drm_device *dev)  	return amdgpu_asic_supports_baco(adev);  } +/* + * VRAM access helper functions + */ +  /** - * VRAM access helper functions. - *   * amdgpu_device_vram_access - read/write a buffer in vram   *   * @adev: amdgpu_device pointer @@ -579,6 +602,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)   * @adev: amdgpu_device pointer   * @pcie_index: mmio register offset   * @pcie_data: mmio register offset + * @reg_addr: indirect register address to read from   *   * Returns the value of indirect register @reg_addr   */ @@ -609,6 +633,7 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,   * @adev: amdgpu_device pointer   * @pcie_index: mmio register offset   * @pcie_data: mmio register offset + * @reg_addr: indirect register address to read from   *   * Returns the value of indirect register @reg_addr   */ @@ -705,7 +730,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,  /**   * amdgpu_invalid_rreg - dummy reg read function   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   * @reg: offset of register   *   * Dummy register read function.  Used for register blocks @@ -722,7 +747,7 @@ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)  /**   * amdgpu_invalid_wreg - dummy reg write function   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   * @reg: offset of register   * @v: value to write to the register   * @@ -739,7 +764,7 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32  /**   * amdgpu_invalid_rreg64 - dummy 64 bit reg read function   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   * @reg: offset of register   *   * Dummy register read function.  Used for register blocks @@ -756,7 +781,7 @@ static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)  /**   * amdgpu_invalid_wreg64 - dummy reg write function   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   * @reg: offset of register   * @v: value to write to the register   * @@ -773,7 +798,7 @@ static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint  /**   * amdgpu_block_invalid_rreg - dummy reg read function   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   * @block: offset of instance   * @reg: offset of register   * @@ -793,7 +818,7 @@ static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,  /**   * amdgpu_block_invalid_wreg - dummy reg write function   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   * @block: offset of instance   * @reg: offset of register   * @v: value to write to the register @@ -813,7 +838,7 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,  /**   * amdgpu_device_asic_init - Wrapper for atom asic_init   * - * @dev: drm_device pointer + * @adev: amdgpu_device pointer   *   * Does any asic specific work and then calls atom asic init.   */ @@ -827,7 +852,7 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)  /**   * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   *   * Allocates a scratch page of VRAM for use by various things in the   * driver. @@ -844,7 +869,7 @@ static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)  /**   * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   *   * Frees the VRAM scratch page.   */ @@ -1370,13 +1395,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)  	amdgpu_gmc_tmz_set(adev); -	if (amdgpu_num_kcq == -1) { -		amdgpu_num_kcq = 8; -	} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { -		amdgpu_num_kcq = 8; -		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n"); -	} -  	amdgpu_gmc_noretry_set(adev);  	return 0; @@ -1397,7 +1415,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,  	struct drm_device *dev = pci_get_drvdata(pdev);  	int r; -	if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF) +	if (amdgpu_device_supports_atpx(dev) && state == VGA_SWITCHEROO_OFF)  		return;  	if (state == VGA_SWITCHEROO_ON) { @@ -1783,6 +1801,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)  	case CHIP_VEGA20:  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  	default:  		return 0;  	case CHIP_VEGA10: @@ -1803,7 +1822,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)  		chip_name = "arcturus";  		break;  	case CHIP_RENOIR: -		chip_name = "renoir"; +		if (adev->apu_flags & AMD_APU_IS_RENOIR) +			chip_name = "renoir"; +		else +			chip_name = "green_sardine";  		break;  	case CHIP_NAVI10:  		chip_name = "navi10"; @@ -1814,6 +1836,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)  	case CHIP_NAVI12:  		chip_name = "navi12";  		break; +	case CHIP_VANGOGH: +		chip_name = "vangogh"; +		break;  	}  	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -1988,7 +2013,12 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)  	case  CHIP_NAVI12:  	case  CHIP_SIENNA_CICHLID:  	case  CHIP_NAVY_FLOUNDER: -		adev->family = AMDGPU_FAMILY_NV; +	case  CHIP_DIMGREY_CAVEFISH: +	case CHIP_VANGOGH: +		if (adev->asic_type == CHIP_VANGOGH) +			adev->family = AMDGPU_FAMILY_VGH; +		else +			adev->family = AMDGPU_FAMILY_NV;  		r = nv_set_ip_blocks(adev);  		if (r) @@ -2637,8 +2667,10 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)  {  	int i, r; -	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); -	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); +	if (!amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) { +		amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); +		amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); +	}  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {  		if (!adev->ip_blocks[i].status.valid) @@ -2993,10 +3025,10 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)  	case CHIP_NAVI14:  	case CHIP_NAVI12:  	case CHIP_RENOIR: -#endif -#if defined(CONFIG_DRM_AMD_DC_DCN3_0)  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH: +	case CHIP_VANGOGH:  #endif  		return amdgpu_dc != 0;  #endif @@ -3011,7 +3043,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)  /**   * amdgpu_device_has_dc_support - check if dc is supported   * - * @adev: amdgpu_device_pointer + * @adev: amdgpu_device pointer   *   * Returns true for supported, false for not supported   */ @@ -3162,7 +3194,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	struct drm_device *ddev = adev_to_drm(adev);  	struct pci_dev *pdev = adev->pdev;  	int r, i; -	bool boco = false; +	bool atpx = false;  	u32 max_MBps;  	adev->shutdown = false; @@ -3331,17 +3363,18 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */  	/* this will fail for cards that aren't VGA class devices, just  	 * ignore it */ -	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); +	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) +		vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); -	if (amdgpu_device_supports_boco(ddev)) -		boco = true; +	if (amdgpu_device_supports_atpx(ddev)) +		atpx = true;  	if (amdgpu_has_atpx() &&  	    (amdgpu_is_atpx_hybrid() ||  	     amdgpu_has_atpx_dgpu_power_cntl()) &&  	    !pci_is_thunderbolt_attached(adev->pdev))  		vga_switcheroo_register_client(adev->pdev, -					       &amdgpu_switcheroo_ops, boco); -	if (boco) +					       &amdgpu_switcheroo_ops, atpx); +	if (atpx)  		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);  	if (amdgpu_emu_mode == 1) { @@ -3524,7 +3557,7 @@ fence_driver_init:  failed:  	amdgpu_vf_error_trans_all(adev); -	if (boco) +	if (atpx)  		vga_switcheroo_fini_domain_pm_ops(adev->dev);  failed_unmap: @@ -3588,9 +3621,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev)  	     amdgpu_has_atpx_dgpu_power_cntl()) &&  	    !pci_is_thunderbolt_attached(adev->pdev))  		vga_switcheroo_unregister_client(adev->pdev); -	if (amdgpu_device_supports_boco(adev_to_drm(adev))) +	if (amdgpu_device_supports_atpx(adev_to_drm(adev)))  		vga_switcheroo_fini_domain_pm_ops(adev->dev); -	vga_client_register(adev->pdev, NULL, NULL, NULL); +	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) +		vga_client_register(adev->pdev, NULL, NULL, NULL);  	if (adev->rio_mem)  		pci_iounmap(adev->pdev, adev->rio_mem);  	adev->rio_mem = NULL; @@ -3693,8 +3727,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)  	amdgpu_fence_driver_suspend(adev); -	r = amdgpu_device_ip_suspend_phase2(adev); - +	if (!amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) +		r = amdgpu_device_ip_suspend_phase2(adev); +	else +		amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);  	/* evict remaining vram memory  	 * This second call to evict vram is to evict the gart page table  	 * using the CPU. @@ -3725,6 +3761,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)  		return 0; +	if (amdgpu_acpi_is_s0ix_supported(adev)) +		amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry); +  	/* post card */  	if (amdgpu_device_need_post(adev)) {  		r = amdgpu_device_asic_init(adev); @@ -4045,7 +4084,7 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)  /**   * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   * @from_hypervisor: request from hypervisor   *   * do VF FLR and reinitialize Asic @@ -4100,7 +4139,7 @@ error:  /**   * amdgpu_device_has_job_running - check if there is any job in mirror list   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   *   * check if there is any job in mirror list   */ @@ -4128,7 +4167,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)  /**   * amdgpu_device_should_recover_gpu - check if we should try GPU recovery   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   *   * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover   * a hung GPU. @@ -4477,7 +4516,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)  /**   * amdgpu_device_gpu_recover - reset the asic and recover scheduler   * - * @adev: amdgpu device pointer + * @adev: amdgpu_device pointer   * @job: which job trigger hang   *   * Attempt to reset the GPU if it has hung (all asics). @@ -4497,7 +4536,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  	bool need_emergency_restart = false;  	bool audio_suspended = false; -	/** +	/*  	 * Special case: RAS triggered and full reset isn't supported  	 */  	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); @@ -4846,7 +4885,7 @@ int amdgpu_device_baco_enter(struct drm_device *dev)  	if (!amdgpu_device_supports_baco(adev_to_drm(adev)))  		return -ENOTSUPP; -	if (ras && ras->supported) +	if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)  		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);  	return amdgpu_dpm_baco_enter(adev); @@ -4865,7 +4904,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev)  	if (ret)  		return ret; -	if (ras && ras->supported) +	if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)  		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);  	return 0; @@ -5030,8 +5069,7 @@ out:   * @pdev: pointer to PCI device   *   * Called when the error recovery driver tells us that its - * OK to resume normal operation. Use completion to allow - * halted scsi ops to resume. + * OK to resume normal operation.   */  void amdgpu_pci_resume(struct pci_dev *pdev)  { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 373cdebe0e2f..52488bb45112 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -44,11 +44,11 @@ struct amdgpu_df_funcs {  	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,  					    bool enable);  	int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, -					 int is_add); +					 int counter_idx, int is_add);  	int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, -					 int is_remove); +					 int counter_idx, int is_remove);  	void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, -					 uint64_t *count); +					 int counter_idx, uint64_t *count);  	uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);  	void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,  			 uint32_t ficadl_val, uint32_t ficadh_val); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index bfb95143ba5e..b2dbcb4df020 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -128,6 +128,7 @@ static int hw_id_map[MAX_HWIP] = {  	[NBIF_HWIP]	= NBIF_HWID,  	[THM_HWIP]	= THM_HWID,  	[CLK_HWIP]	= CLKA_HWID, +	[UMC_HWIP]	= UMC_HWID,  };  static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 7cc7af2a6822..f764803c53a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -38,6 +38,7 @@  #include <drm/drm_edid.h>  #include <drm/drm_gem_framebuffer_helper.h>  #include <drm/drm_fb_helper.h> +#include <drm/drm_fourcc.h>  #include <drm/drm_vblank.h>  static void amdgpu_display_flip_callback(struct dma_fence *f, @@ -132,10 +133,7 @@ static void amdgpu_display_unpin_work_func(struct work_struct *__work)  	/* unpin of the old buffer */  	r = amdgpu_bo_reserve(work->old_abo, true);  	if (likely(r == 0)) { -		r = amdgpu_bo_unpin(work->old_abo); -		if (unlikely(r != 0)) { -			DRM_ERROR("failed to unpin buffer after flip\n"); -		} +		amdgpu_bo_unpin(work->old_abo);  		amdgpu_bo_unreserve(work->old_abo);  	} else  		DRM_ERROR("failed to reserve buffer after flip\n"); @@ -249,8 +247,7 @@ pflip_cleanup:  	}  unpin:  	if (!adev->enable_virtual_display) -		if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) -			DRM_ERROR("failed to unpin new abo in error path\n"); +		amdgpu_bo_unpin(new_abo);  unreserve:  	amdgpu_bo_unreserve(new_abo); @@ -444,10 +441,6 @@ void amdgpu_display_print_display_setup(struct drm_device *dev)  	drm_connector_list_iter_end(&iter);  } -/** - * amdgpu_display_ddc_probe - * - */  bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,  			      bool use_aux)  { @@ -512,7 +505,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,  	 * to avoid hang caused by placement of scanout BO in GTT on certain  	 * APUs. So force the BO placement to VRAM in case this architecture  	 * will not allow USWC mappings. -	 * Also, don't allow GTT domain if the BO doens't have USWC falg set. +	 * Also, don't allow GTT domain if the BO doesn't have USWC flag set.  	 */  	if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&  	    amdgpu_bo_support_uswc(bo_flags) && @@ -528,6 +521,11 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,  			    (adev->apu_flags & AMD_APU_IS_PICASSO))  				domain |= AMDGPU_GEM_DOMAIN_GTT;  			break; +		case CHIP_RENOIR: +		case CHIP_VANGOGH: +			domain |= AMDGPU_GEM_DOMAIN_GTT; +			break; +  		default:  			break;  		} @@ -537,20 +535,390 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,  	return domain;  } +static const struct drm_format_info dcc_formats[] = { +	{ .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, +	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +	 { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, +	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +	{ .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, +	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, +	   .has_alpha = true, }, +	{ .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, +	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, +	  .has_alpha = true, }, +	{ .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 2, +	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, +	  .has_alpha = true, }, +	{ .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 2, +	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +	{ .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 2, +	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +	{ .format = DRM_FORMAT_ARGB2101010, .depth = 30, .num_planes = 2, +	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, +	  .has_alpha = true, }, +	{ .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 2, +	  .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, +	  .has_alpha = true, }, +	{ .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 2, +	  .cpp = { 2, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +}; + +static const struct drm_format_info dcc_retile_formats[] = { +	{ .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 3, +	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +	 { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 3, +	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +	{ .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 3, +	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, +	   .has_alpha = true, }, +	{ .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 3, +	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, +	  .has_alpha = true, }, +	{ .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 3, +	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, +	  .has_alpha = true, }, +	{ .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 3, +	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +	{ .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 3, +	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +	{ .format = DRM_FORMAT_ARGB2101010, .depth = 30, .num_planes = 3, +	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, +	  .has_alpha = true, }, +	{ .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 3, +	  .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, +	  .has_alpha = true, }, +	{ .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 3, +	  .cpp = { 2, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +}; + +static const struct drm_format_info * +lookup_format_info(const struct drm_format_info formats[], +		  int num_formats, u32 format) +{ +	int i; + +	for (i = 0; i < num_formats; i++) { +		if (formats[i].format == format) +			return &formats[i]; +	} + +	return NULL; +} + +const struct drm_format_info * +amdgpu_lookup_format_info(u32 format, uint64_t modifier) +{ +	if (!IS_AMD_FMT_MOD(modifier)) +		return NULL; + +	if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) +		return lookup_format_info(dcc_retile_formats, +					  ARRAY_SIZE(dcc_retile_formats), +					  format); + +	if (AMD_FMT_MOD_GET(DCC, modifier)) +		return lookup_format_info(dcc_formats, ARRAY_SIZE(dcc_formats), +					  format); + +	/* returning NULL will cause the default format structs to be used. */ +	return NULL; +} + + +/* + * Tries to extract the renderable DCC offset from the opaque metadata attached + * to the buffer. + */ +static int +extract_render_dcc_offset(struct amdgpu_device *adev, +			  struct drm_gem_object *obj, +			  uint64_t *offset) +{ +	struct amdgpu_bo *rbo; +	int r = 0; +	uint32_t metadata[10]; /* Something that fits a descriptor + header. */ +	uint32_t size; + +	rbo = gem_to_amdgpu_bo(obj); +	r = amdgpu_bo_reserve(rbo, false); + +	if (unlikely(r)) { +		/* Don't show error message when returning -ERESTARTSYS */ +		if (r != -ERESTARTSYS) +			DRM_ERROR("Unable to reserve buffer: %d\n", r); +		return r; +	} + +	r = amdgpu_bo_get_metadata(rbo, metadata, sizeof(metadata), &size, NULL); +	amdgpu_bo_unreserve(rbo); + +	if (r) +		return r; + +	/* +	 * The first word is the metadata version, and we need space for at least +	 * the version + pci vendor+device id + 8 words for a descriptor. +	 */ +	if (size < 40  || metadata[0] != 1) +		return -EINVAL; + +	if (adev->family >= AMDGPU_FAMILY_NV) { +		/* resource word 6/7 META_DATA_ADDRESS{_LO} */ +		*offset = ((u64)metadata[9] << 16u) | +			  ((metadata[8] & 0xFF000000u) >> 16); +	} else { +		/* resource word 5/7 META_DATA_ADDRESS */ +		*offset = ((u64)metadata[9] << 8u) | +			  ((u64)(metadata[7] & 0x1FE0000u) << 23); +	} + +	return 0; +} + +static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) +{ +	struct amdgpu_device *adev = drm_to_adev(afb->base.dev); +	uint64_t modifier = 0; + +	if (!afb->tiling_flags || !AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) { +		modifier = DRM_FORMAT_MOD_LINEAR; +	} else { +		int swizzle = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE); +		bool has_xor = swizzle >= 16; +		int block_size_bits; +		int version; +		int pipe_xor_bits = 0; +		int bank_xor_bits = 0; +		int packers = 0; +		int rb = 0; +		int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); +		uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B); + +		switch (swizzle >> 2) { +		case 0: /* 256B */ +			block_size_bits = 8; +			break; +		case 1: /* 4KiB */ +		case 5: /* 4KiB _X */ +			block_size_bits = 12; +			break; +		case 2: /* 64KiB */ +		case 4: /* 64 KiB _T */ +		case 6: /* 64 KiB _X */ +			block_size_bits = 16; +			break; +		default: +			/* RESERVED or VAR */ +			return -EINVAL; +		} + +		if (adev->asic_type >= CHIP_SIENNA_CICHLID) +			version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS; +		else if (adev->family == AMDGPU_FAMILY_NV) +			version = AMD_FMT_MOD_TILE_VER_GFX10; +		else +			version = AMD_FMT_MOD_TILE_VER_GFX9; + +		switch (swizzle & 3) { +		case 0: /* Z microtiling */ +			return -EINVAL; +		case 1: /* S microtiling */ +			if (!has_xor) +				version = AMD_FMT_MOD_TILE_VER_GFX9; +			break; +		case 2: +			if (!has_xor && afb->base.format->cpp[0] != 4) +				version = AMD_FMT_MOD_TILE_VER_GFX9; +			break; +		case 3: +			break; +		} + +		if (has_xor) { +			switch (version) { +			case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: +				pipe_xor_bits = min(block_size_bits - 8, pipes); +				packers = min(block_size_bits - 8 - pipe_xor_bits, +					      ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs)); +				break; +			case AMD_FMT_MOD_TILE_VER_GFX10: +				pipe_xor_bits = min(block_size_bits - 8, pipes); +				break; +			case AMD_FMT_MOD_TILE_VER_GFX9: +				rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) + +				     ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se); +				pipe_xor_bits = min(block_size_bits - 8, pipes + +						    ilog2(adev->gfx.config.gb_addr_config_fields.num_se)); +				bank_xor_bits = min(block_size_bits - 8 - pipe_xor_bits, +						    ilog2(adev->gfx.config.gb_addr_config_fields.num_banks)); +				break; +			} +		} + +		modifier = AMD_FMT_MOD | +			   AMD_FMT_MOD_SET(TILE, AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) | +			   AMD_FMT_MOD_SET(TILE_VERSION, version) | +			   AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | +			   AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | +			   AMD_FMT_MOD_SET(PACKERS, packers); + +		if (dcc_offset != 0) { +			bool dcc_i64b = AMDGPU_TILING_GET(afb->tiling_flags, DCC_INDEPENDENT_64B) != 0; +			bool dcc_i128b = version >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS; +			const struct drm_format_info *format_info; +			u64 render_dcc_offset; + +			/* Enable constant encode on RAVEN2 and later. */ +			bool dcc_constant_encode = adev->asic_type > CHIP_RAVEN || +						   (adev->asic_type == CHIP_RAVEN && +						    adev->external_rev_id >= 0x81); + +			int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B : +					      dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B : +					      AMD_FMT_MOD_DCC_BLOCK_256B; + +			modifier |= AMD_FMT_MOD_SET(DCC, 1) | +				    AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, dcc_constant_encode) | +				    AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, dcc_i64b) | +				    AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, dcc_i128b) | +				    AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_cblock_size); + +			afb->base.offsets[1] = dcc_offset * 256 + afb->base.offsets[0]; +			afb->base.pitches[1] = +				AMDGPU_TILING_GET(afb->tiling_flags, DCC_PITCH_MAX) + 1; + +			/* +			 * If the userspace driver uses retiling the tiling flags do not contain +			 * info on the renderable DCC buffer. Luckily the opaque metadata contains +			 * the info so we can try to extract it. The kernel does not use this info +			 * but we should convert it to a modifier plane for getfb2, so the +			 * userspace driver that gets it doesn't have to juggle around another DCC +			 * plane internally. +			 */ +			if (extract_render_dcc_offset(adev, afb->base.obj[0], +						      &render_dcc_offset) == 0 && +			    render_dcc_offset != 0 && +			    render_dcc_offset != afb->base.offsets[1] && +			    render_dcc_offset < UINT_MAX) { +				uint32_t dcc_block_bits;  /* of base surface data */ + +				modifier |= AMD_FMT_MOD_SET(DCC_RETILE, 1); +				afb->base.offsets[2] = render_dcc_offset; + +				if (adev->family >= AMDGPU_FAMILY_NV) { +					int extra_pipe = 0; + +					if (adev->asic_type >= CHIP_SIENNA_CICHLID && +					    pipes == packers && pipes > 1) +						extra_pipe = 1; + +					dcc_block_bits = max(20, 16 + pipes + extra_pipe); +				} else { +					modifier |= AMD_FMT_MOD_SET(RB, rb) | +						    AMD_FMT_MOD_SET(PIPE, pipes); +					dcc_block_bits = max(20, 18 + rb); +				} + +				dcc_block_bits -= ilog2(afb->base.format->cpp[0]); +				afb->base.pitches[2] = ALIGN(afb->base.width, +							     1u << ((dcc_block_bits + 1) / 2)); +			} +			format_info = amdgpu_lookup_format_info(afb->base.format->format, +								modifier); +			if (!format_info) +				return -EINVAL; + +			afb->base.format = format_info; +		} +	} + +	afb->base.modifier = modifier; +	afb->base.flags |= DRM_MODE_FB_MODIFIERS; +	return 0; +} + +static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb, +				      uint64_t *tiling_flags, bool *tmz_surface) +{ +	struct amdgpu_bo *rbo; +	int r; + +	if (!amdgpu_fb) { +		*tiling_flags = 0; +		*tmz_surface = false; +		return 0; +	} + +	rbo = gem_to_amdgpu_bo(amdgpu_fb->base.obj[0]); +	r = amdgpu_bo_reserve(rbo, false); + +	if (unlikely(r)) { +		/* Don't show error message when returning -ERESTARTSYS */ +		if (r != -ERESTARTSYS) +			DRM_ERROR("Unable to reserve buffer: %d\n", r); +		return r; +	} + +	if (tiling_flags) +		amdgpu_bo_get_tiling_flags(rbo, tiling_flags); + +	if (tmz_surface) +		*tmz_surface = amdgpu_bo_encrypted(rbo); + +	amdgpu_bo_unreserve(rbo); + +	return r; +} +  int amdgpu_display_framebuffer_init(struct drm_device *dev,  				    struct amdgpu_framebuffer *rfb,  				    const struct drm_mode_fb_cmd2 *mode_cmd,  				    struct drm_gem_object *obj)  { -	int ret; +	int ret, i;  	rfb->base.obj[0] = obj;  	drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);  	ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); -	if (ret) { -		rfb->base.obj[0] = NULL; -		return ret; +	if (ret) +		goto fail; + +	/* +	 * This needs to happen before modifier conversion as that might change +	 * the number of planes. +	 */ +	for (i = 1; i < rfb->base.format->num_planes; ++i) { +		if (mode_cmd->handles[i] != mode_cmd->handles[0]) { +			drm_dbg_kms(dev, "Plane 0 and %d have different BOs: %u vs. %u\n", +				    i, mode_cmd->handles[0], mode_cmd->handles[i]); +			ret = -EINVAL; +			goto fail; +		} +	} + +	ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface); +	if (ret) +		goto fail; + +	if (dev->mode_config.allow_fb_modifiers && +	    !(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) { +		ret = convert_tiling_flags_to_modifier(rfb); +		if (ret) { +			drm_dbg_kms(dev, "Failed to convert tiling flags 0x%llX to a modifier", +				    rfb->tiling_flags); +			goto fail; +		} +	} + +	for (i = 1; i < rfb->base.format->num_planes; ++i) { +		rfb->base.obj[i] = rfb->base.obj[0]; +		drm_gem_object_get(rfb->base.obj[i]);  	} +  	return 0; + +fail: +	rfb->base.obj[0] = NULL; +	return ret;  }  struct drm_framebuffer * @@ -564,14 +932,14 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,  	obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]);  	if (obj ==  NULL) { -		dev_err(&dev->pdev->dev, "No GEM object associated to handle 0x%08X, " -			"can't create framebuffer\n", mode_cmd->handles[0]); +		drm_dbg_kms(dev, "No GEM object associated to handle 0x%08X, " +			    "can't create framebuffer\n", mode_cmd->handles[0]);  		return ERR_PTR(-ENOENT);  	}  	/* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */  	if (obj->import_attach) { -		DRM_DEBUG_KMS("Cannot create framebuffer from imported dma_buf\n"); +		drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n");  		return ERR_PTR(-EINVAL);  	} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 3620b24785e1..dc7b7d116549 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -44,5 +44,7 @@ struct drm_framebuffer *  amdgpu_display_user_framebuffer_create(struct drm_device *dev,  				       struct drm_file *file_priv,  				       const struct drm_mode_fb_cmd2 *mode_cmd); +const struct drm_format_info * +amdgpu_lookup_format_info(u32 format, uint64_t modifier);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 957934926b24..e42175e1acf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -42,42 +42,6 @@  #include <linux/pci-p2pdma.h>  /** - * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation - * @obj: GEM BO - * - * Sets up an in-kernel virtual mapping of the BO's memory. - * - * Returns: - * The virtual address of the mapping or an error pointer. - */ -void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj) -{ -	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); -	int ret; - -	ret = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, -			  &bo->dma_buf_vmap); -	if (ret) -		return ERR_PTR(ret); - -	return bo->dma_buf_vmap.virtual; -} - -/** - * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation - * @obj: GEM BO - * @vaddr: Virtual address (unused) - * - * Tears down the in-kernel virtual mapping of the BO's memory. - */ -void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) -{ -	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - -	ttm_bo_kunmap(&bo->dma_buf_vmap); -} - -/**   * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation   * @obj: GEM BO   * @vma: Virtual memory area @@ -281,7 +245,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,  	struct sg_table *sgt;  	long r; -	if (!bo->pin_count) { +	if (!bo->tbo.pin_count) {  		/* move buffer into GTT or VRAM */  		struct ttm_operation_ctx ctx = { false, false };  		unsigned domains = AMDGPU_GEM_DOMAIN_GTT; @@ -390,7 +354,8 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,  	if (unlikely(ret != 0))  		return ret; -	if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) { +	if (!bo->tbo.pin_count && +	    (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {  		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);  		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  	} @@ -459,6 +424,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)  	struct amdgpu_device *adev = drm_to_adev(dev);  	struct amdgpu_bo *bo;  	struct amdgpu_bo_param bp; +	struct drm_gem_object *gobj;  	int ret;  	memset(&bp, 0, sizeof(bp)); @@ -469,17 +435,20 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)  	bp.type = ttm_bo_type_sg;  	bp.resv = resv;  	dma_resv_lock(resv, NULL); -	ret = amdgpu_bo_create(adev, &bp, &bo); +	ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE, +			AMDGPU_GEM_DOMAIN_CPU, +			0, ttm_bo_type_sg, resv, &gobj);  	if (ret)  		goto error; +	bo = gem_to_amdgpu_bo(gobj);  	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;  	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;  	if (dma_buf->ops != &amdgpu_dmabuf_ops)  		bo->prime_shared_count = 1;  	dma_resv_unlock(resv); -	return &bo->tbo.base; +	return gobj;  error:  	dma_resv_unlock(resv); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h index 2c5c84a06bb9..39b5b9616fd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h @@ -31,8 +31,6 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,  					    struct dma_buf *dma_buf);  bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,  				      struct amdgpu_bo *bo); -void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); -void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);  int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,  			  struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 42d9748921f5..72efd579ec5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -40,6 +40,7 @@  #include "amdgpu.h"  #include "amdgpu_irq.h"  #include "amdgpu_dma_buf.h" +#include "amdgpu_sched.h"  #include "amdgpu_amdkfd.h" @@ -94,16 +95,16 @@  #define KMS_DRIVER_MINOR	40  #define KMS_DRIVER_PATCHLEVEL	0 -int amdgpu_vram_limit = 0; -int amdgpu_vis_vram_limit = 0; +int amdgpu_vram_limit; +int amdgpu_vis_vram_limit;  int amdgpu_gart_size = -1; /* auto */  int amdgpu_gtt_size = -1; /* auto */  int amdgpu_moverate = -1; /* auto */ -int amdgpu_benchmarking = 0; -int amdgpu_testing = 0; +int amdgpu_benchmarking; +int amdgpu_testing;  int amdgpu_audio = -1; -int amdgpu_disp_priority = 0; -int amdgpu_hw_i2c = 0; +int amdgpu_disp_priority; +int amdgpu_hw_i2c;  int amdgpu_pcie_gen2 = -1;  int amdgpu_msi = -1;  char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; @@ -113,19 +114,19 @@ int amdgpu_aspm = -1;  int amdgpu_runtime_pm = -1;  uint amdgpu_ip_block_mask = 0xffffffff;  int amdgpu_bapm = -1; -int amdgpu_deep_color = 0; +int amdgpu_deep_color;  int amdgpu_vm_size = -1;  int amdgpu_vm_fragment_size = -1;  int amdgpu_vm_block_size = -1; -int amdgpu_vm_fault_stop = 0; -int amdgpu_vm_debug = 0; +int amdgpu_vm_fault_stop; +int amdgpu_vm_debug;  int amdgpu_vm_update_mode = -1; -int amdgpu_exp_hw_support = 0; +int amdgpu_exp_hw_support;  int amdgpu_dc = -1;  int amdgpu_sched_jobs = 32;  int amdgpu_sched_hw_submission = 2; -uint amdgpu_pcie_gen_cap = 0; -uint amdgpu_pcie_lane_cap = 0; +uint amdgpu_pcie_gen_cap; +uint amdgpu_pcie_lane_cap;  uint amdgpu_cg_mask = 0xffffffff;  uint amdgpu_pg_mask = 0xffffffff;  uint amdgpu_sdma_phase_quantum = 32; @@ -133,23 +134,31 @@ char *amdgpu_disable_cu = NULL;  char *amdgpu_virtual_display = NULL;  /* OverDrive(bit 14) disabled by default*/  uint amdgpu_pp_feature_mask = 0xffffbfff; -uint amdgpu_force_long_training = 0; -int amdgpu_job_hang_limit = 0; +uint amdgpu_force_long_training; +int amdgpu_job_hang_limit;  int amdgpu_lbpw = -1;  int amdgpu_compute_multipipe = -1;  int amdgpu_gpu_recovery = -1; /* auto */ -int amdgpu_emu_mode = 0; -uint amdgpu_smu_memory_pool_size = 0; -/* FBC (bit 0) disabled by default*/ -uint amdgpu_dc_feature_mask = 0; -uint amdgpu_dc_debug_mask = 0; +int amdgpu_emu_mode; +uint amdgpu_smu_memory_pool_size; +/* + * FBC (bit 0) disabled by default + * MULTI_MON_PP_MCLK_SWITCH (bit 1) enabled by default + *   - With this, for multiple monitors in sync(e.g. with the same model), + *     mclk switching will be allowed. And the mclk will be not foced to the + *     highest. That helps saving some idle power. + * DISABLE_FRACTIONAL_PWM (bit 2) disabled by default + * PSR (bit 3) disabled by default + */ +uint amdgpu_dc_feature_mask = 2; +uint amdgpu_dc_debug_mask;  int amdgpu_async_gfx_ring = 1; -int amdgpu_mcbp = 0; +int amdgpu_mcbp;  int amdgpu_discovery = -1; -int amdgpu_mes = 0; +int amdgpu_mes;  int amdgpu_noretry = -1;  int amdgpu_force_asic_type = -1; -int amdgpu_tmz = 0; +int amdgpu_tmz;  int amdgpu_reset_method = -1; /* auto */  int amdgpu_num_kcq = -1; @@ -271,7 +280,7 @@ module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_  /**   * DOC: dpm (int)   * Override for dynamic power management setting - * (0 = disable, 1 = enable, 2 = enable sw smu driver for vega20) + * (0 = disable, 1 = enable)   * The default is -1 (auto).   */  MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)"); @@ -296,7 +305,7 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);   * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down   * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality.   */ -MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)");  module_param_named(runpm, amdgpu_runtime_pm, int, 0444);  /** @@ -764,7 +773,7 @@ module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);   * Defaults to 0, or disabled. Userspace can still override this level later   * after boot.   */ -uint amdgpu_dm_abm_level = 0; +uint amdgpu_dm_abm_level;  MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) ");  module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); @@ -782,7 +791,7 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);   * DOC: reset_method (int)   * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)   */ -MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); +MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)");  module_param_named(reset_method, amdgpu_reset_method, int, 0444);  /** @@ -1055,10 +1064,10 @@ static const struct pci_device_id pciidlist[] = {  	{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},  	{0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},  	/* Arcturus */ -	{0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, -	{0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, -	{0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, -	{0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, +	{0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS}, +	{0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS}, +	{0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS}, +	{0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS},  	/* Navi10 */  	{0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},  	{0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, @@ -1089,12 +1098,27 @@ static const struct pci_device_id pciidlist[] = {  	{0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},  	{0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, +	/* Van Gogh */ +	{0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU}, + +	/* Navy_Flounder */ +	{0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, +	{0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, +	{0x1002, 0x73C3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, +	{0x1002, 0x73DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + +	/* DIMGREY_CAVEFISH */ +	{0x1002, 0x73E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, +	{0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, +	{0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, +	{0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, +  	{0, 0, 0}  };  MODULE_DEVICE_TABLE(pci, pciidlist); -static struct drm_driver kms_driver; +static const struct drm_driver amdgpu_kms_driver;  static int amdgpu_pci_probe(struct pci_dev *pdev,  			    const struct pci_device_id *ent) @@ -1165,7 +1189,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,  	if (ret)  		return ret; -	adev = devm_drm_dev_alloc(&pdev->dev, &kms_driver, typeof(*adev), ddev); +	adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);  	if (IS_ERR(adev))  		return PTR_ERR(adev); @@ -1316,7 +1340,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)  	}  	adev->in_runpm = true; -	if (amdgpu_device_supports_boco(drm_dev)) +	if (amdgpu_device_supports_atpx(drm_dev))  		drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;  	drm_kms_helper_poll_disable(drm_dev); @@ -1324,13 +1348,11 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)  	if (ret)  		return ret; -	if (amdgpu_device_supports_boco(drm_dev)) { +	if (amdgpu_device_supports_atpx(drm_dev)) {  		/* Only need to handle PCI state in the driver for ATPX  		 * PCI core handles it for _PR3.  		 */ -		if (amdgpu_is_atpx_hybrid()) { -			pci_ignore_hotplug(pdev); -		} else { +		if (!amdgpu_is_atpx_hybrid()) {  			amdgpu_device_cache_pci_state(pdev);  			pci_disable_device(pdev);  			pci_ignore_hotplug(pdev); @@ -1354,28 +1376,31 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)  	if (!adev->runpm)  		return -EINVAL; -	if (amdgpu_device_supports_boco(drm_dev)) { +	if (amdgpu_device_supports_atpx(drm_dev)) {  		drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;  		/* Only need to handle PCI state in the driver for ATPX  		 * PCI core handles it for _PR3.  		 */ -		if (amdgpu_is_atpx_hybrid()) { -			pci_set_master(pdev); -		} else { +		if (!amdgpu_is_atpx_hybrid()) {  			pci_set_power_state(pdev, PCI_D0);  			amdgpu_device_load_pci_state(pdev);  			ret = pci_enable_device(pdev);  			if (ret)  				return ret; -			pci_set_master(pdev);  		} +		pci_set_master(pdev); +	} else if (amdgpu_device_supports_boco(drm_dev)) { +		/* Only need to handle PCI state in the driver for ATPX +		 * PCI core handles it for _PR3. +		 */ +		pci_set_master(pdev);  	} else if (amdgpu_device_supports_baco(drm_dev)) {  		amdgpu_device_baco_exit(drm_dev);  	}  	ret = amdgpu_device_resume(drm_dev, false);  	drm_kms_helper_poll_enable(drm_dev); -	if (amdgpu_device_supports_boco(drm_dev)) +	if (amdgpu_device_supports_atpx(drm_dev))  		drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;  	adev->in_runpm = false;  	return 0; @@ -1495,7 +1520,7 @@ static const struct file_operations amdgpu_driver_kms_fops = {  int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)  { -        struct drm_file *file; +	struct drm_file *file;  	if (!filp)  		return -EINVAL; @@ -1509,7 +1534,27 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)  	return 0;  } -static struct drm_driver kms_driver = { +const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { +	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), +	DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	/* KMS */ +	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_CS, amdgpu_cs_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_INFO, amdgpu_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_CS, amdgpu_cs_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_FENCES, amdgpu_cs_wait_fences_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_METADATA, amdgpu_gem_metadata_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +}; + +static const struct drm_driver amdgpu_kms_driver = {  	.driver_features =  	    DRIVER_ATOMIC |  	    DRIVER_GEM | @@ -1520,19 +1565,14 @@ static struct drm_driver kms_driver = {  	.lastclose = amdgpu_driver_lastclose_kms,  	.irq_handler = amdgpu_irq_handler,  	.ioctls = amdgpu_ioctls_kms, -	.gem_free_object_unlocked = amdgpu_gem_object_free, -	.gem_open_object = amdgpu_gem_object_open, -	.gem_close_object = amdgpu_gem_object_close, +	.num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms),  	.dumb_create = amdgpu_mode_dumb_create,  	.dumb_map_offset = amdgpu_mode_dumb_mmap,  	.fops = &amdgpu_driver_kms_fops,  	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,  	.prime_fd_to_handle = drm_gem_prime_fd_to_handle, -	.gem_prime_export = amdgpu_gem_prime_export,  	.gem_prime_import = amdgpu_gem_prime_import, -	.gem_prime_vmap = amdgpu_gem_prime_vmap, -	.gem_prime_vunmap = amdgpu_gem_prime_vunmap,  	.gem_prime_mmap = amdgpu_gem_prime_mmap,  	.name = DRIVER_NAME, @@ -1578,7 +1618,6 @@ static int __init amdgpu_init(void)  		goto error_fence;  	DRM_INFO("amdgpu kernel modesetting enabled.\n"); -	kms_driver.num_ioctls = amdgpu_max_kms_ioctl;  	amdgpu_register_atpx_handler();  	/* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index e2c2eb45a793..0bf7d36c6686 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -207,6 +207,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,  	int ret;  	unsigned long tmp; +	memset(&mode_cmd, 0, sizeof(mode_cmd));  	mode_cmd.width = sizes->surface_width;  	mode_cmd.height = sizes->surface_height; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index fe2d495d08ab..d56f4023ebb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -130,6 +130,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)   *   * @ring: ring the fence is associated with   * @f: resulting fence object + * @flags: flags to pass into the subordinate .emit_fence() call   *   * Emits a fence command on the requested ring (all asics).   * Returns 0 on success, -ENOMEM on failure. @@ -187,6 +188,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,   *   * @ring: ring the fence is associated with   * @s: resulting sequence number + * @timeout: the timeout for waiting in usecs   *   * Emits a fence command on the requested ring (all asics).   * Used For polling fence. @@ -294,7 +296,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)  /**   * amdgpu_fence_fallback - fallback for hardware interrupts   * - * @work: delayed work item + * @t: timer context used to obtain the pointer to ring structure   *   * Checks for fence activity.   */ @@ -310,7 +312,6 @@ static void amdgpu_fence_fallback(struct timer_list *t)  /**   * amdgpu_fence_wait_empty - wait for all fences to signal   * - * @adev: amdgpu device pointer   * @ring: ring index the fence is associated with   *   * Wait for all fences on the requested ring to signal (all asics). @@ -639,7 +640,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)  /**   * amdgpu_fence_enable_signaling - enable signalling on fence - * @fence: fence + * @f: fence   *   * This function is called with fence_queue lock held, and adds a callback   * to fence_queue that checks if this fence is signaled, and if so it @@ -675,7 +676,7 @@ static void amdgpu_fence_free(struct rcu_head *rcu)  /**   * amdgpu_fence_release - callback that fence can be freed   * - * @fence: fence + * @f: fence   *   * This function is called when the reference count becomes zero.   * It just RCU schedules freeing up the fence. @@ -740,7 +741,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)  	return 0;  } -/** +/*   * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover   *   * Manually trigger a gpu reset at the next fence wait. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c new file mode 100644 index 000000000000..7c6e02e35573 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c @@ -0,0 +1,141 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/debugfs.h> +#include <linux/firmware.h> +#include <linux/dma-mapping.h> + +#include "amdgpu.h" +#include "amdgpu_fw_attestation.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" + +#define FW_ATTESTATION_DB_COOKIE        0x143b6a37 +#define FW_ATTESTATION_RECORD_VALID  	1 +#define FW_ATTESTATION_MAX_SIZE		4096 + +typedef struct FW_ATT_DB_HEADER +{ +	uint32_t AttDbVersion;           /* version of the fwar feature */ +	uint32_t AttDbCookie;            /* cookie as an extra check for corrupt data */ +} FW_ATT_DB_HEADER; + +typedef struct FW_ATT_RECORD +{ +	uint16_t AttFwIdV1;              /* Legacy FW Type field */ +	uint16_t AttFwIdV2;              /* V2 FW ID field */ +	uint32_t AttFWVersion;           /* FW Version */ +	uint16_t AttFWActiveFunctionID;  /* The VF ID (only in VF Attestation Table) */ +	uint16_t AttSource;              /* FW source indicator */ +	uint16_t RecordValid;            /* Indicates whether the record is a valid entry */ +	uint8_t  AttFwTaId;              /* Ta ID (only in TA Attestation Table) */ +	uint8_t  Reserved; +} FW_ATT_RECORD; + +static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f, +						  char __user *buf, +						  size_t size, +						  loff_t *pos) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; +	uint64_t records_addr = 0; +	uint64_t vram_pos = 0; +	FW_ATT_DB_HEADER fw_att_hdr = {0}; +	FW_ATT_RECORD fw_att_record = {0}; + +	if (size < sizeof(FW_ATT_RECORD)) { +		DRM_WARN("FW attestation input buffer not enough memory"); +		return -EINVAL; +	} + +	if ((*pos + sizeof(FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) { +		DRM_WARN("FW attestation out of bounds"); +		return 0; +	} + +	if (psp_get_fw_attestation_records_addr(&adev->psp, &records_addr)) { +		DRM_WARN("Failed to get FW attestation record address"); +		return -EINVAL; +	} + +	vram_pos =  records_addr - adev->gmc.vram_start; + +	if (*pos == 0) { +		amdgpu_device_vram_access(adev, +					  vram_pos, +					  (uint32_t*)&fw_att_hdr, +					  sizeof(FW_ATT_DB_HEADER), +					  false); + +		if (fw_att_hdr.AttDbCookie != FW_ATTESTATION_DB_COOKIE) { +			DRM_WARN("Invalid FW attestation cookie"); +			return -EINVAL; +		} + +		DRM_INFO("FW attestation version = 0x%X", fw_att_hdr.AttDbVersion); +	} + +	amdgpu_device_vram_access(adev, +				  vram_pos + sizeof(FW_ATT_DB_HEADER) + *pos, +				  (uint32_t*)&fw_att_record, +				  sizeof(FW_ATT_RECORD), +				  false); + +	if (fw_att_record.RecordValid != FW_ATTESTATION_RECORD_VALID) +		return 0; + +	if (copy_to_user(buf, (void*)&fw_att_record, sizeof(FW_ATT_RECORD))) +		return -EINVAL; + +	*pos += sizeof(FW_ATT_RECORD); + +	return sizeof(FW_ATT_RECORD); +} + +static const struct file_operations amdgpu_fw_attestation_debugfs_ops = { +	.owner = THIS_MODULE, +	.read = amdgpu_fw_attestation_debugfs_read, +	.write = NULL, +	.llseek = default_llseek +}; + +static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev) +{ +	if (adev->asic_type >= CHIP_SIENNA_CICHLID) +		return 1; + +	return 0; +} + +void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev) +{ +	if (!amdgpu_is_fw_attestation_supported(adev)) +		return; + +	debugfs_create_file("amdgpu_fw_attestation", +			    S_IRUSR, +			    adev_to_drm(adev)->primary->debugfs_root, +			    adev, +			    &amdgpu_fw_attestation_debugfs_ops); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h new file mode 100644 index 000000000000..90af4fe58c99 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h @@ -0,0 +1,30 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#ifndef _AMDGPU_FW_ATTESTATION_H +#define _AMDGPU_FW_ATTESTATION_H + +#include "amdgpu.h" + +void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index e01e681d2a60..0db933026722 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -75,9 +75,9 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)  	if (adev->dummy_page_addr)  		return 0; -	adev->dummy_page_addr = pci_map_page(adev->pdev, dummy_page, 0, +	adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0,  					     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); -	if (pci_dma_mapping_error(adev->pdev, adev->dummy_page_addr)) { +	if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) {  		dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");  		adev->dummy_page_addr = 0;  		return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 7e8265da9f25..d0a1fee1f5f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -33,12 +33,16 @@  #include <drm/amdgpu_drm.h>  #include <drm/drm_debugfs.h> +#include <drm/drm_gem_ttm_helper.h>  #include "amdgpu.h"  #include "amdgpu_display.h" +#include "amdgpu_dma_buf.h"  #include "amdgpu_xgmi.h" -void amdgpu_gem_object_free(struct drm_gem_object *gobj) +static const struct drm_gem_object_funcs amdgpu_gem_object_funcs; + +static void amdgpu_gem_object_free(struct drm_gem_object *gobj)  {  	struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); @@ -66,27 +70,14 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,  	bp.type = type;  	bp.resv = resv;  	bp.preferred_domain = initial_domain; -retry:  	bp.flags = flags;  	bp.domain = initial_domain;  	r = amdgpu_bo_create(adev, &bp, &bo); -	if (r) { -		if (r != -ERESTARTSYS) { -			if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { -				flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; -				goto retry; -			} - -			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { -				initial_domain |= AMDGPU_GEM_DOMAIN_GTT; -				goto retry; -			} -			DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n", -				  size, initial_domain, alignment, r); -		} +	if (r)  		return r; -	} +  	*obj = &bo->tbo.base; +	(*obj)->funcs = &amdgpu_gem_object_funcs;  	return 0;  } @@ -119,8 +110,8 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev)   * Call from drm_gem_handle_create which appear in both new and open ioctl   * case.   */ -int amdgpu_gem_object_open(struct drm_gem_object *obj, -			   struct drm_file *file_priv) +static int amdgpu_gem_object_open(struct drm_gem_object *obj, +				  struct drm_file *file_priv)  {  	struct amdgpu_bo *abo = gem_to_amdgpu_bo(obj);  	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); @@ -152,8 +143,8 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,  	return 0;  } -void amdgpu_gem_object_close(struct drm_gem_object *obj, -			     struct drm_file *file_priv) +static void amdgpu_gem_object_close(struct drm_gem_object *obj, +				    struct drm_file *file_priv)  {  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -211,6 +202,15 @@ out_unlock:  	ttm_eu_backoff_reservation(&ticket, &list);  } +static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = { +	.free = amdgpu_gem_object_free, +	.open = amdgpu_gem_object_open, +	.close = amdgpu_gem_object_close, +	.export = amdgpu_gem_prime_export, +	.vmap = drm_gem_ttm_vmap, +	.vunmap = drm_gem_ttm_vunmap, +}; +  /*   * GEM ioctls.   */ @@ -225,7 +225,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,  	uint64_t size = args->in.bo_size;  	struct dma_resv *resv = NULL;  	struct drm_gem_object *gobj; -	uint32_t handle; +	uint32_t handle, initial_domain;  	int r;  	/* reject invalid gem flags */ @@ -269,9 +269,28 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,  		resv = vm->root.base.bo->tbo.base.resv;  	} +retry: +	initial_domain = (u32)(0xffffffff & args->in.domains);  	r = amdgpu_gem_object_create(adev, size, args->in.alignment, -				     (u32)(0xffffffff & args->in.domains), +				     initial_domain,  				     flags, ttm_bo_type_device, resv, &gobj); +	if (r) { +		if (r != -ERESTARTSYS) { +			if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { +				flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; +				goto retry; +			} + +			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { +				initial_domain |= AMDGPU_GEM_DOMAIN_GTT; +				goto retry; +			} +			DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n", +				  size, initial_domain, args->in.alignment, r); +		} +		return r; +	} +  	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {  		if (!r) {  			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); @@ -836,67 +855,6 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,  }  #if defined(CONFIG_DEBUG_FS) - -#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag)	\ -	if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) {	\ -		seq_printf((m), " " #flag);		\ -	} - -static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) -{ -	struct drm_gem_object *gobj = ptr; -	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); -	struct seq_file *m = data; - -	struct dma_buf_attachment *attachment; -	struct dma_buf *dma_buf; -	unsigned domain; -	const char *placement; -	unsigned pin_count; - -	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); -	switch (domain) { -	case AMDGPU_GEM_DOMAIN_VRAM: -		placement = "VRAM"; -		break; -	case AMDGPU_GEM_DOMAIN_GTT: -		placement = " GTT"; -		break; -	case AMDGPU_GEM_DOMAIN_CPU: -	default: -		placement = " CPU"; -		break; -	} -	seq_printf(m, "\t0x%08x: %12ld byte %s", -		   id, amdgpu_bo_size(bo), placement); - -	pin_count = READ_ONCE(bo->pin_count); -	if (pin_count) -		seq_printf(m, " pin count %d", pin_count); - -	dma_buf = READ_ONCE(bo->tbo.base.dma_buf); -	attachment = READ_ONCE(bo->tbo.base.import_attach); - -	if (attachment) -		seq_printf(m, " imported from %p%s", dma_buf, -			   attachment->peer2peer ? " P2P" : ""); -	else if (dma_buf) -		seq_printf(m, " exported as %p", dma_buf); - -	amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED); -	amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS); -	amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC); -	amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED); -	amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW); -	amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS); -	amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID); -	amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC); - -	seq_printf(m, "\n"); - -	return 0; -} -  static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)  {  	struct drm_info_node *node = (struct drm_info_node *)m->private; @@ -910,6 +868,8 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)  	list_for_each_entry(file, &dev->filelist, lhead) {  		struct task_struct *task; +		struct drm_gem_object *gobj; +		int id;  		/*  		 * Although we have a valid reference on file->pid, that does @@ -924,7 +884,11 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)  		rcu_read_unlock();  		spin_lock(&file->table_lock); -		idr_for_each(&file->object_idr, amdgpu_debugfs_gem_bo_info, m); +		idr_for_each_entry(&file->object_idr, gobj, id) { +			struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); + +			amdgpu_bo_print_info(id, bo, m); +		}  		spin_unlock(&file->table_lock);  	} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index e0f025dd1b14..637bf51dbf06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -33,11 +33,6 @@  #define AMDGPU_GEM_DOMAIN_MAX		0x3  #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base) -void amdgpu_gem_object_free(struct drm_gem_object *obj); -int amdgpu_gem_object_open(struct drm_gem_object *obj, -				struct drm_file *file_priv); -void amdgpu_gem_object_close(struct drm_gem_object *obj, -				struct drm_file *file_priv);  unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);  /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 8c9bacfdbc30..cd2c676a2797 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -193,10 +193,14 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)  }  bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, -					       int queue) +					       int pipe, int queue)  { -	/* Policy: make queue 0 of each pipe as high priority compute queue */ -	return (queue == 0); +	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); +	int cond; +	/* Policy: alternate between normal and high priority */ +	cond = multipipe_policy ? pipe : queue; + +	return ((cond % 2) != 0);  } @@ -804,3 +808,34 @@ failed_undo:  failed_kiq_write:  	dev_err(adev->dev, "failed to write reg:%x\n", reg);  } + +int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) +{ +	if (amdgpu_num_kcq == -1) { +		return 8; +	} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { +		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n"); +		return 8; +	} +	return amdgpu_num_kcq; +} + +/* amdgpu_gfx_state_change_set - Handle gfx power state change set + * @adev: amdgpu_device pointer + * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2 -sGpuChangeState_D3Entry) + * + */ + +void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state) +{ +	if (is_support_sw_smu(adev)) { +		smu_gfx_state_change_set(&adev->smu, state); +	} else { +		mutex_lock(&adev->pm.mutex); +		if (adev->powerplay.pp_funcs && +		    adev->powerplay.pp_funcs->gfx_state_change_set) +			((adev)->powerplay.pp_funcs->gfx_state_change_set( +				(adev)->powerplay.pp_handle, state)); +		mutex_unlock(&adev->pm.mutex); +	} +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 258498cbf1eb..6b5a8f4642cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -47,6 +47,12 @@ enum gfx_pipe_priority {  	AMDGPU_GFX_PIPE_PRIO_MAX  }; +/* Argument for PPSMC_MSG_GpuChangeState */ +enum gfx_change_state { +	sGpuChangeState_D0Entry = 1, +	sGpuChangeState_D3Entry, +}; +  #define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM  0  #define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM  15 @@ -218,6 +224,7 @@ struct amdgpu_gfx_funcs {  	void (*reset_ras_error_count) (struct amdgpu_device *adev);  	void (*init_spm_golden)(struct amdgpu_device *adev);  	void (*query_ras_error_status) (struct amdgpu_device *adev); +	void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);  };  struct sq_work { @@ -373,7 +380,7 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,  bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,  				     int pipe, int queue);  bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, -					       int queue); +					       int pipe, int queue);  int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,  			       int pipe, int queue);  void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, @@ -392,4 +399,6 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,  				  struct amdgpu_iv_entry *entry);  uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);  void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); +void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 36604d751d62..6e679db5e46f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -45,12 +45,10 @@ void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,  			       uint64_t *addr, uint64_t *flags)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); -	struct ttm_dma_tt *ttm;  	switch (bo->tbo.mem.mem_type) {  	case TTM_PL_TT: -		ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); -		*addr = ttm->dma_address[0]; +		*addr = bo->tbo.ttm->dma_address[0];  		break;  	case TTM_PL_VRAM:  		*addr = amdgpu_bo_gpu_offset(bo); @@ -63,9 +61,8 @@ void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,  	amdgpu_gmc_get_vm_pde(adev, level, addr, flags);  } -/** +/*   * amdgpu_gmc_pd_addr - return the address of the root directory - *   */  uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)  { @@ -114,7 +111,7 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,  /**   * amdgpu_gmc_agp_addr - return the address in the AGP address space   * - * @tbo: TTM BO which needs the address, must be in GTT domain + * @bo: TTM BO which needs the address, must be in GTT domain   *   * Tries to figure out how to access the BO through the AGP aperture. Returns   * AMDGPU_BO_INVALID_OFFSET if that is not possible. @@ -122,16 +119,14 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,  uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); -	struct ttm_dma_tt *ttm; -	if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached) +	if (bo->num_pages != 1 || bo->ttm->caching == ttm_cached)  		return AMDGPU_BO_INVALID_OFFSET; -	ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm); -	if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size) +	if (bo->ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)  		return AMDGPU_BO_INVALID_OFFSET; -	return adev->gmc.agp_start + ttm->dma_address[0]; +	return adev->gmc.agp_start + bo->ttm->dma_address[0];  }  /** @@ -393,6 +388,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)  	case CHIP_NAVI10:  	case CHIP_NAVI14:  	case CHIP_NAVI12: +	case CHIP_VANGOGH:  		/* Don't enable it by default yet.  		 */  		if (amdgpu_tmz < 1) { @@ -425,20 +421,26 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)  	struct amdgpu_gmc *gmc = &adev->gmc;  	switch (adev->asic_type) { -	case CHIP_RAVEN: -		/* Raven currently has issues with noretry -		 * regardless of what we decide for other -		 * asics, we should leave raven with -		 * noretry = 0 until we root cause the -		 * issues. +	case CHIP_VEGA10: +	case CHIP_VEGA20: +		/* +		 * noretry = 0 will cause kfd page fault tests fail +		 * for some ASICs, so set default to 1 for these ASICs.  		 */  		if (amdgpu_noretry == -1) -			gmc->noretry = 0; +			gmc->noretry = 1;  		else  			gmc->noretry = amdgpu_noretry;  		break; +	case CHIP_RAVEN:  	default: -		/* default this to 0 for now, but we may want +		/* Raven currently has issues with noretry +		 * regardless of what we decide for other +		 * asics, we should leave raven with +		 * noretry = 0 until we root cause the +		 * issues. +		 * +		 * default this to 0 for now, but we may want  		 * to change this in the future for certain  		 * GPUs as it can increase performance in  		 * certain cases. @@ -494,11 +496,16 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)  		break;  	} -	if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) +	if (amdgpu_sriov_vf(adev) || +	    !amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) {  		size = 0; -	else +	} else {  		size = amdgpu_gmc_get_vbios_fb_size(adev); +		if (adev->mman.keep_stolen_vga_memory) +			size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION); +	} +  	/* set to 0 if the pre-OS buffer uses up most of vram */  	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))  		size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index f203e4a6a3f2..8980329cded0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -81,8 +81,8 @@ static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func;  /**   * amdgpu_gtt_mgr_init - init GTT manager and DRM MM   * - * @man: TTM memory type manager - * @p_size: maximum size of GTT + * @adev: amdgpu_device pointer + * @gtt_size: maximum size of GTT   *   * Allocate and initialize the GTT manager.   */ @@ -123,7 +123,7 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)  /**   * amdgpu_gtt_mgr_fini - free and destroy GTT manager   * - * @man: TTM memory type manager + * @adev: amdgpu_device pointer   *   * Destroy and free the GTT manager, returns -EBUSY if ranges are still   * allocated inside it. @@ -136,7 +136,7 @@ void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)  	ttm_resource_manager_set_used(man, false); -	ret = ttm_resource_manager_force_list_clean(&adev->mman.bdev, man); +	ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);  	if (ret)  		return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2f53fa0ae9a6..024d0a563a65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -52,8 +52,10 @@  /**   * amdgpu_ib_get - request an IB (Indirect Buffer)   * - * @ring: ring index the IB is associated with + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm pointer   * @size: requested IB size + * @pool_type: IB pool type (delayed, immediate, direct)   * @ib: IB object returned   *   * Request an IB (all asics).  IBs are allocated using the @@ -101,9 +103,10 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,  /**   * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring   * - * @adev: amdgpu_device pointer + * @ring: ring index the IB is associated with   * @num_ibs: number of IBs to schedule   * @ibs: IB objects to schedule + * @job: job to schedule   * @f: fence created during this submission   *   * Schedule an IB on the associated ring (all asics). diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 6e9a9e5dbea0..94b069630db3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -208,7 +208,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,  	if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))  		return amdgpu_sync_fence(sync, ring->vmid_wait); -	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); +	fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);  	if (!fences)  		return -ENOMEM; @@ -259,6 +259,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,   * @sync: sync object where we add dependencies   * @fence: fence protecting ID from reuse   * @job: job who wants to use the VMID + * @id: resulting VMID   *   * Try to assign a reserved VMID.   */ @@ -514,6 +515,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,   * amdgpu_vmid_reset - reset VMID to zero   *   * @adev: amdgpu device structure + * @vmhub: vmhub type   * @vmid: vmid number to use   *   * Reset saved GDW, GWS and OA to force switch on next flush. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 111a301ce878..dcd9b4a8e20b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -132,6 +132,35 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)  }  /** + * amdgpu_ih_ring_write - write IV to the ring buffer + * + * @ih: ih ring to write to + * @iv: the iv to write + * @num_dw: size of the iv in dw + * + * Writes an IV to the ring buffer using the CPU and increment the wptr. + * Used for testing and delegating IVs to a software ring. + */ +void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, +			  unsigned int num_dw) +{ +	uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2; +	unsigned int i; + +	for (i = 0; i < num_dw; ++i) +	        ih->ring[wptr++] = cpu_to_le32(iv[i]); + +	wptr <<= 2; +	wptr &= ih->ptr_mask; + +	/* Only commit the new wptr if we don't overflow */ +	if (wptr != READ_ONCE(ih->rptr)) { +		wmb(); +		WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr)); +	} +} + +/**   * amdgpu_ih_process - interrupt handler   *   * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 4e0bb645176d..3c9cfe7eecff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -72,6 +72,8 @@ struct amdgpu_ih_funcs {  int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,  			unsigned ring_size, bool use_bus_addr);  void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); +void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, +			  unsigned int num_dw);  int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 300ac73b4738..bea57e8e793f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -207,6 +207,21 @@ static void amdgpu_irq_handle_ih2(struct work_struct *work)  }  /** + * amdgpu_irq_handle_ih_soft - kick of processing for ih_soft + * + * @work: work structure in struct amdgpu_irq + * + * Kick of processing IH soft ring. + */ +static void amdgpu_irq_handle_ih_soft(struct work_struct *work) +{ +	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, +						  irq.ih_soft_work); + +	amdgpu_ih_process(adev, &adev->irq.ih_soft); +} + +/**   * amdgpu_msi_ok - check whether MSI functionality is enabled   *   * @adev: amdgpu device pointer (unused) @@ -281,6 +296,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)  	INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1);  	INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2); +	INIT_WORK(&adev->irq.ih_soft_work, amdgpu_irq_handle_ih_soft);  	adev->irq.installed = true;  	/* Use vector 0 for MSI-X */ @@ -413,6 +429,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,  	bool handled = false;  	int r; +	entry.ih = ih;  	entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];  	amdgpu_ih_decode_iv(adev, &entry); @@ -451,6 +468,24 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,  }  /** + * amdgpu_irq_delegate - delegate IV to soft IH ring + * + * @adev: amdgpu device pointer + * @entry: IV entry + * @num_dw: size of IV + * + * Delegate the IV to the soft IH ring and schedule processing of it. Used + * if the hardware delegation to IH1 or IH2 doesn't work for some reason. + */ +void amdgpu_irq_delegate(struct amdgpu_device *adev, +			 struct amdgpu_iv_entry *entry, +			 unsigned int num_dw) +{ +	amdgpu_ih_ring_write(&adev->irq.ih_soft, entry->iv_entry, num_dw); +	schedule_work(&adev->irq.ih_soft_work); +} + +/**   * amdgpu_irq_update - update hardware interrupt state   *   * @adev: amdgpu device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index c718e94a55c9..ac527e5deae6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -44,6 +44,7 @@ enum amdgpu_interrupt_state {  };  struct amdgpu_iv_entry { +	struct amdgpu_ih_ring *ih;  	unsigned client_id;  	unsigned src_id;  	unsigned ring_id; @@ -88,9 +89,9 @@ struct amdgpu_irq {  	bool				msi_enabled; /* msi enabled */  	/* interrupt rings */ -	struct amdgpu_ih_ring		ih, ih1, ih2; +	struct amdgpu_ih_ring		ih, ih1, ih2, ih_soft;  	const struct amdgpu_ih_funcs    *ih_funcs; -	struct work_struct		ih1_work, ih2_work; +	struct work_struct		ih1_work, ih2_work, ih_soft_work;  	struct amdgpu_irq_src		self_irq;  	/* gen irq stuff */ @@ -109,6 +110,9 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,  		      struct amdgpu_irq_src *source);  void amdgpu_irq_dispatch(struct amdgpu_device *adev,  			 struct amdgpu_ih_ring *ih); +void amdgpu_irq_delegate(struct amdgpu_device *adev, +			 struct amdgpu_iv_entry *entry, +			 unsigned int num_dw);  int amdgpu_irq_update(struct amdgpu_device *adev, struct amdgpu_irq_src *src,  		      unsigned type);  int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index efda38349a03..b16b32797624 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -29,7 +29,6 @@  #include "amdgpu.h"  #include <drm/drm_debugfs.h>  #include <drm/amdgpu_drm.h> -#include "amdgpu_sched.h"  #include "amdgpu_uvd.h"  #include "amdgpu_vce.h"  #include "atom.h" @@ -134,6 +133,7 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev)  int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)  {  	struct drm_device *dev; +	struct pci_dev *parent;  	int r, acpi_status;  	dev = adev_to_drm(adev); @@ -145,6 +145,9 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)  	    !pci_is_thunderbolt_attached(dev->pdev))  		flags |= AMD_IS_PX; +	parent = pci_upstream_bridge(adev->pdev); +	adev->has_pr3 = parent ? pci_pr3_present(parent) : false; +  	/* amdgpu_device_init should report only fatal error  	 * like memory allocation failure or iomapping failure,  	 * or memory manager initialization failure, it must @@ -157,16 +160,17 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)  		goto out;  	} -	if (amdgpu_device_supports_boco(dev) && -	    (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */ +	if (amdgpu_device_supports_atpx(dev) && +	    (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */ +		adev->runpm = true; +		dev_info(adev->dev, "Using ATPX for runtime pm\n"); +	} else if (amdgpu_device_supports_boco(dev) && +		   (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */  		adev->runpm = true; +		dev_info(adev->dev, "Using BOCO for runtime pm\n");  	} else if (amdgpu_device_supports_baco(dev) &&  		   (amdgpu_runtime_pm != 0)) {  		switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_CIK -		case CHIP_BONAIRE: -		case CHIP_HAWAII: -#endif  		case CHIP_VEGA20:  		case CHIP_ARCTURUS:  		case CHIP_SIENNA_CICHLID: @@ -181,10 +185,12 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)  				adev->runpm = true;  			break;  		default: -			/* enable runpm on VI+ */ +			/* enable runpm on CI+ */  			adev->runpm = true;  			break;  		} +		if (adev->runpm) +			dev_info(adev->dev, "Using BACO for runtime pm\n");  	}  	/* Call ACPI methods: require modeset init @@ -197,7 +203,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)  	if (adev->runpm) {  		/* only need to skip on ATPX */ -		if (amdgpu_device_supports_boco(dev) && +		if (amdgpu_device_supports_atpx(dev) &&  		    !amdgpu_is_atpx_hybrid())  			dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);  		pm_runtime_use_autosuspend(dev->dev); @@ -325,6 +331,10 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,  		fw_info->ver = adev->dm.dmcub_fw_version;  		fw_info->feature = 0;  		break; +	case AMDGPU_INFO_FW_TOC: +		fw_info->ver = adev->psp.toc_fw_version; +		fw_info->feature = adev->psp.toc_feature_version; +		break;  	default:  		return -EINVAL;  	} @@ -471,7 +481,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,  /**   * amdgpu_info_ioctl - answer a device specific request.   * - * @adev: amdgpu device pointer + * @dev: drm device pointer   * @data: request object   * @filp: drm filp   * @@ -480,7 +490,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,   * etc. (all asics).   * Returns 0 on success, -EINVAL on failure.   */ -static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  {  	struct amdgpu_device *adev = drm_to_adev(dev);  	struct drm_amdgpu_info *info = data; @@ -717,38 +727,42 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file  		return n ? -EFAULT : 0;  	}  	case AMDGPU_INFO_DEV_INFO: { -		struct drm_amdgpu_info_device dev_info; +		struct drm_amdgpu_info_device *dev_info;  		uint64_t vm_size; +		int ret; + +		dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL); +		if (!dev_info) +			return -ENOMEM; -		memset(&dev_info, 0, sizeof(dev_info)); -		dev_info.device_id = dev->pdev->device; -		dev_info.chip_rev = adev->rev_id; -		dev_info.external_rev = adev->external_rev_id; -		dev_info.pci_rev = dev->pdev->revision; -		dev_info.family = adev->family; -		dev_info.num_shader_engines = adev->gfx.config.max_shader_engines; -		dev_info.num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; +		dev_info->device_id = dev->pdev->device; +		dev_info->chip_rev = adev->rev_id; +		dev_info->external_rev = adev->external_rev_id; +		dev_info->pci_rev = dev->pdev->revision; +		dev_info->family = adev->family; +		dev_info->num_shader_engines = adev->gfx.config.max_shader_engines; +		dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;  		/* return all clocks in KHz */ -		dev_info.gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10; +		dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;  		if (adev->pm.dpm_enabled) { -			dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; -			dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; +			dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; +			dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10;  		} else { -			dev_info.max_engine_clock = adev->clock.default_sclk * 10; -			dev_info.max_memory_clock = adev->clock.default_mclk * 10; +			dev_info->max_engine_clock = adev->clock.default_sclk * 10; +			dev_info->max_memory_clock = adev->clock.default_mclk * 10;  		} -		dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; -		dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se * +		dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; +		dev_info->num_rb_pipes = adev->gfx.config.max_backends_per_se *  			adev->gfx.config.max_shader_engines; -		dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; -		dev_info._pad = 0; -		dev_info.ids_flags = 0; +		dev_info->num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; +		dev_info->_pad = 0; +		dev_info->ids_flags = 0;  		if (adev->flags & AMD_IS_APU) -			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION; +			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION;  		if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) -			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; +			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;  		if (amdgpu_is_tmz(adev)) -			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_TMZ; +			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ;  		vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;  		vm_size -= AMDGPU_VA_RESERVED_SIZE; @@ -758,45 +772,47 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file  		    adev->vce.fw_version < AMDGPU_VCE_FW_53_45)  			vm_size = min(vm_size, 1ULL << 40); -		dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; -		dev_info.virtual_address_max = +		dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; +		dev_info->virtual_address_max =  			min(vm_size, AMDGPU_GMC_HOLE_START);  		if (vm_size > AMDGPU_GMC_HOLE_START) { -			dev_info.high_va_offset = AMDGPU_GMC_HOLE_END; -			dev_info.high_va_max = AMDGPU_GMC_HOLE_END | vm_size; +			dev_info->high_va_offset = AMDGPU_GMC_HOLE_END; +			dev_info->high_va_max = AMDGPU_GMC_HOLE_END | vm_size;  		} -		dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); -		dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; -		dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; -		dev_info.cu_active_number = adev->gfx.cu_info.number; -		dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; -		dev_info.ce_ram_size = adev->gfx.ce_ram_size; -		memcpy(&dev_info.cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], +		dev_info->virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); +		dev_info->pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; +		dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE; +		dev_info->cu_active_number = adev->gfx.cu_info.number; +		dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; +		dev_info->ce_ram_size = adev->gfx.ce_ram_size; +		memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0],  		       sizeof(adev->gfx.cu_info.ao_cu_bitmap)); -		memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], +		memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],  		       sizeof(adev->gfx.cu_info.bitmap)); -		dev_info.vram_type = adev->gmc.vram_type; -		dev_info.vram_bit_width = adev->gmc.vram_width; -		dev_info.vce_harvest_config = adev->vce.harvest_config; -		dev_info.gc_double_offchip_lds_buf = +		dev_info->vram_type = adev->gmc.vram_type; +		dev_info->vram_bit_width = adev->gmc.vram_width; +		dev_info->vce_harvest_config = adev->vce.harvest_config; +		dev_info->gc_double_offchip_lds_buf =  			adev->gfx.config.double_offchip_lds_buf; -		dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; -		dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; -		dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; -		dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; -		dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; -		dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; -		dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads; +		dev_info->wave_front_size = adev->gfx.cu_info.wave_front_size; +		dev_info->num_shader_visible_vgprs = adev->gfx.config.max_gprs; +		dev_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; +		dev_info->num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; +		dev_info->gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; +		dev_info->gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; +		dev_info->max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads;  		if (adev->family >= AMDGPU_FAMILY_NV) -			dev_info.pa_sc_tile_steering_override = +			dev_info->pa_sc_tile_steering_override =  				adev->gfx.config.pa_sc_tile_steering_override; -		dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; +		dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; -		return copy_to_user(out, &dev_info, -				    min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; +		ret = copy_to_user(out, dev_info, +				   min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0; +		kfree(dev_info); +		return ret;  	}  	case AMDGPU_INFO_VCE_CLOCK_TABLE: {  		unsigned i; @@ -1243,27 +1259,6 @@ void amdgpu_disable_vblank_kms(struct drm_crtc *crtc)  	amdgpu_irq_put(adev, &adev->crtc_irq, idx);  } -const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { -	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), -	DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	/* KMS */ -	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_CS, amdgpu_cs_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_INFO, amdgpu_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_CS, amdgpu_cs_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_FENCES, amdgpu_cs_wait_fences_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_METADATA, amdgpu_gem_metadata_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), -	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW) -}; -const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); -  /*   * Debugfs info   */ @@ -1466,6 +1461,13 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)  	seq_printf(m, "DMCUB feature version: %u, firmware version: 0x%08x\n",  		   fw_info.feature, fw_info.ver); +	/* TOC */ +	query_fw.fw_type = AMDGPU_INFO_FW_TOC; +	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); +	if (ret) +		return ret; +	seq_printf(m, "TOC feature version: %u, firmware version: 0x%08x\n", +		   fw_info.feature, fw_info.ver);  	seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index a04decb934b0..319cb19e1b99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -302,6 +302,9 @@ struct amdgpu_display_funcs {  struct amdgpu_framebuffer {  	struct drm_framebuffer base; +	uint64_t tiling_flags; +	bool tmz_surface; +  	/* caching for later use */  	uint64_t address;  }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index edaac242ff85..e62cc0e1a5ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -53,6 +53,8 @@ struct amdgpu_nbio_funcs {  	u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev);  	u32 (*get_pcie_index_offset)(struct amdgpu_device *adev);  	u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); +	u32 (*get_pcie_port_index_offset)(struct amdgpu_device *adev); +	u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev);  	u32 (*get_rev_id)(struct amdgpu_device *adev);  	void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);  	void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); @@ -85,6 +87,8 @@ struct amdgpu_nbio_funcs {  	void (*query_ras_error_count)(struct amdgpu_device *adev,  					void *ras_error_status);  	int (*ras_late_init)(struct amdgpu_device *adev); +	void (*enable_aspm)(struct amdgpu_device *adev, +			    bool enable);  };  struct amdgpu_nbio { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index ac043baac05d..25ec4d57333f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -78,7 +78,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)  	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);  	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); -	if (bo->pin_count > 0) +	if (bo->tbo.pin_count > 0)  		amdgpu_bo_subtract_pin_size(bo);  	amdgpu_bo_kunmap(bo); @@ -137,7 +137,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)  		places[c].fpfn = 0;  		places[c].lpfn = 0;  		places[c].mem_type = TTM_PL_VRAM; -		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED; +		places[c].flags = 0;  		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)  			places[c].lpfn = visible_pfn; @@ -154,11 +154,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)  		places[c].lpfn = 0;  		places[c].mem_type = TTM_PL_TT;  		places[c].flags = 0; -		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) -			places[c].flags |= TTM_PL_FLAG_WC | -				TTM_PL_FLAG_UNCACHED; -		else -			places[c].flags |= TTM_PL_FLAG_CACHED;  		c++;  	} @@ -167,11 +162,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)  		places[c].lpfn = 0;  		places[c].mem_type = TTM_PL_SYSTEM;  		places[c].flags = 0; -		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) -			places[c].flags |= TTM_PL_FLAG_WC | -				TTM_PL_FLAG_UNCACHED; -		else -			places[c].flags |= TTM_PL_FLAG_CACHED;  		c++;  	} @@ -179,7 +169,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)  		places[c].fpfn = 0;  		places[c].lpfn = 0;  		places[c].mem_type = AMDGPU_PL_GDS; -		places[c].flags = TTM_PL_FLAG_UNCACHED; +		places[c].flags = 0;  		c++;  	} @@ -187,7 +177,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)  		places[c].fpfn = 0;  		places[c].lpfn = 0;  		places[c].mem_type = AMDGPU_PL_GWS; -		places[c].flags = TTM_PL_FLAG_UNCACHED; +		places[c].flags = 0;  		c++;  	} @@ -195,7 +185,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)  		places[c].fpfn = 0;  		places[c].lpfn = 0;  		places[c].mem_type = AMDGPU_PL_OA; -		places[c].flags = TTM_PL_FLAG_UNCACHED; +		places[c].flags = 0;  		c++;  	} @@ -203,7 +193,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)  		places[c].fpfn = 0;  		places[c].lpfn = 0;  		places[c].mem_type = TTM_PL_SYSTEM; -		places[c].flags = TTM_PL_MASK_CACHING; +		places[c].flags = 0;  		c++;  	} @@ -526,9 +516,10 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,  	struct ttm_operation_ctx ctx = {  		.interruptible = (bp->type != ttm_bo_type_kernel),  		.no_wait_gpu = bp->no_wait_gpu, -		.resv = bp->resv, -		.flags = bp->type != ttm_bo_type_kernel ? -			TTM_OPT_FLAG_ALLOW_RES_EVICT : 0 +		/* We opt to avoid OOM on system pages allocations */ +		.gfp_retry_mayfail = true, +		.allow_res_evict = bp->type != ttm_bo_type_kernel, +		.resv = bp->resv  	};  	struct amdgpu_bo *bo;  	unsigned long page_align, size = bp->size; @@ -721,7 +712,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)  	uint32_t domain;  	int r; -	if (bo->pin_count) +	if (bo->tbo.pin_count)  		return 0;  	domain = bo->preferred_domains; @@ -918,13 +909,13 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,  	 */  	domain = amdgpu_bo_get_preferred_pin_domain(adev, domain); -	if (bo->pin_count) { +	if (bo->tbo.pin_count) {  		uint32_t mem_type = bo->tbo.mem.mem_type;  		if (!(domain & amdgpu_mem_type_to_domain(mem_type)))  			return -EINVAL; -		bo->pin_count++; +		ttm_bo_pin(&bo->tbo);  		if (max_offset != 0) {  			u64 domain_start = amdgpu_ttm_domain_start(adev, @@ -955,7 +946,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,  		if (!bo->placements[i].lpfn ||  		    (lpfn && lpfn < bo->placements[i].lpfn))  			bo->placements[i].lpfn = lpfn; -		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;  	}  	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -964,7 +954,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,  		goto error;  	} -	bo->pin_count = 1; +	ttm_bo_pin(&bo->tbo);  	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);  	if (domain == AMDGPU_GEM_DOMAIN_VRAM) { @@ -1006,34 +996,16 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)   * Returns:   * 0 for success or a negative error code on failure.   */ -int amdgpu_bo_unpin(struct amdgpu_bo *bo) +void amdgpu_bo_unpin(struct amdgpu_bo *bo)  { -	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); -	struct ttm_operation_ctx ctx = { false, false }; -	int r, i; - -	if (WARN_ON_ONCE(!bo->pin_count)) { -		dev_warn(adev->dev, "%p unpin not necessary\n", bo); -		return 0; -	} -	bo->pin_count--; -	if (bo->pin_count) -		return 0; +	ttm_bo_unpin(&bo->tbo); +	if (bo->tbo.pin_count) +		return;  	amdgpu_bo_subtract_pin_size(bo);  	if (bo->tbo.base.import_attach)  		dma_buf_unpin(bo->tbo.base.import_attach); - -	for (i = 0; i < bo->placement.num_placement; i++) { -		bo->placements[i].lpfn = 0; -		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; -	} -	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); -	if (unlikely(r)) -		dev_err(adev->dev, "%p validate failed for unpin\n", bo); - -	return r;  }  /** @@ -1048,6 +1020,8 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)   */  int amdgpu_bo_evict_vram(struct amdgpu_device *adev)  { +	struct ttm_resource_manager *man; +  	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */  #ifndef CONFIG_HIBERNATION  	if (adev->flags & AMD_IS_APU) { @@ -1055,7 +1029,9 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev)  		return 0;  	}  #endif -	return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM); + +	man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); +	return ttm_resource_manager_evict_all(&adev->mman.bdev, man);  }  static const char *amdgpu_vram_names[] = { @@ -1069,6 +1045,7 @@ static const char *amdgpu_vram_names[] = {  	"DDR3",  	"DDR4",  	"GDDR6", +	"DDR5"  };  /** @@ -1098,23 +1075,6 @@ int amdgpu_bo_init(struct amdgpu_device *adev)  }  /** - * amdgpu_bo_late_init - late init - * @adev: amdgpu device object - * - * Calls amdgpu_ttm_late_init() to free resources used earlier during - * initialization. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_late_init(struct amdgpu_device *adev) -{ -	amdgpu_ttm_late_init(adev); - -	return 0; -} - -/**   * amdgpu_bo_fini - tear down memory manager   * @adev: amdgpu device object   * @@ -1360,19 +1320,14 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)   * Returns:   * 0 for success or a negative error code on failure.   */ -int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) +vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);  	struct ttm_operation_ctx ctx = { false, false }; -	struct amdgpu_bo *abo; +	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);  	unsigned long offset, size;  	int r; -	if (!amdgpu_bo_is_amdgpu_bo(bo)) -		return 0; - -	abo = ttm_to_amdgpu_bo(bo); -  	/* Remember that this BO was accessed by the CPU */  	abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; @@ -1385,8 +1340,8 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)  		return 0;  	/* Can't move a pinned BO to visible VRAM */ -	if (abo->pin_count > 0) -		return -EINVAL; +	if (abo->tbo.pin_count > 0) +		return VM_FAULT_SIGBUS;  	/* hurrah the memory is not visible ! */  	atomic64_inc(&adev->num_vram_cpu_page_faults); @@ -1398,15 +1353,18 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)  	abo->placement.busy_placement = &abo->placements[1];  	r = ttm_bo_validate(bo, &abo->placement, &ctx); -	if (unlikely(r != 0)) -		return r; +	if (unlikely(r == -EBUSY || r == -ERESTARTSYS)) +		return VM_FAULT_NOPAGE; +	else if (unlikely(r)) +		return VM_FAULT_SIGBUS;  	offset = bo->mem.start << PAGE_SHIFT;  	/* this should never happen */  	if (bo->mem.mem_type == TTM_PL_VRAM &&  	    (offset + size) > adev->gmc.visible_vram_size) -		return -EINVAL; +		return VM_FAULT_SIGBUS; +	ttm_bo_move_to_lru_tail_unlocked(bo);  	return 0;  } @@ -1489,7 +1447,7 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)  {  	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);  	WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) && -		     !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel); +		     !bo->tbo.pin_count && bo->tbo.type != ttm_bo_type_kernel);  	WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);  	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&  		     !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)); @@ -1533,3 +1491,77 @@ uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,  	}  	return domain;  } + +#if defined(CONFIG_DEBUG_FS) +#define amdgpu_bo_print_flag(m, bo, flag)		        \ +	do {							\ +		if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) {	\ +			seq_printf((m), " " #flag);		\ +		}						\ +	} while (0) + +/** + * amdgpu_bo_print_info - print BO info in debugfs file + * + * @id: Index or Id of the BO + * @bo: Requested BO for printing info + * @m: debugfs file + * + * Print BO information in debugfs file + * + * Returns: + * Size of the BO in bytes. + */ +u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) +{ +	struct dma_buf_attachment *attachment; +	struct dma_buf *dma_buf; +	unsigned int domain; +	const char *placement; +	unsigned int pin_count; +	u64 size; + +	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); +	switch (domain) { +	case AMDGPU_GEM_DOMAIN_VRAM: +		placement = "VRAM"; +		break; +	case AMDGPU_GEM_DOMAIN_GTT: +		placement = " GTT"; +		break; +	case AMDGPU_GEM_DOMAIN_CPU: +	default: +		placement = " CPU"; +		break; +	} + +	size = amdgpu_bo_size(bo); +	seq_printf(m, "\t\t0x%08x: %12lld byte %s", +			id, size, placement); + +	pin_count = READ_ONCE(bo->tbo.pin_count); +	if (pin_count) +		seq_printf(m, " pin count %d", pin_count); + +	dma_buf = READ_ONCE(bo->tbo.base.dma_buf); +	attachment = READ_ONCE(bo->tbo.base.import_attach); + +	if (attachment) +		seq_printf(m, " imported from %p", dma_buf); +	else if (dma_buf) +		seq_printf(m, " exported as %p", dma_buf); + +	amdgpu_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED); +	amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS); +	amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC); +	amdgpu_bo_print_flag(m, bo, VRAM_CLEARED); +	amdgpu_bo_print_flag(m, bo, SHADOW); +	amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS); +	amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID); +	amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC); + +	seq_puts(m, "\n"); + +	return size; +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 5ddb6cf96030..79120ec41396 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -89,7 +89,6 @@ struct amdgpu_bo {  	struct ttm_buffer_object	tbo;  	struct ttm_bo_kmap_obj		kmap;  	u64				flags; -	unsigned			pin_count;  	u64				tiling_flags;  	u64				metadata_flags;  	void				*metadata; @@ -101,7 +100,6 @@ struct amdgpu_bo {  	struct amdgpu_bo		*parent;  	struct amdgpu_bo		*shadow; -	struct ttm_bo_kmap_obj		dma_buf_vmap;  	struct amdgpu_mn		*mn; @@ -267,10 +265,9 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo);  int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);  int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,  			     u64 min_offset, u64 max_offset); -int amdgpu_bo_unpin(struct amdgpu_bo *bo); +void amdgpu_bo_unpin(struct amdgpu_bo *bo);  int amdgpu_bo_evict_vram(struct amdgpu_device *adev);  int amdgpu_bo_init(struct amdgpu_device *adev); -int amdgpu_bo_late_init(struct amdgpu_device *adev);  void amdgpu_bo_fini(struct amdgpu_device *adev);  int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,  				struct vm_area_struct *vma); @@ -285,7 +282,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,  			   bool evict,  			   struct ttm_resource *new_mem);  void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); -int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); +vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);  void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,  		     bool shared);  int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv, @@ -330,6 +327,7 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev,  #if defined(CONFIG_DEBUG_FS)  void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,  					 struct seq_file *m); +u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m);  #endif  int amdgpu_debugfs_sa_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c index 1f2305b7bd13..f2e20666c9c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c @@ -102,11 +102,12 @@ static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_   * amdgpu_pll_compute - compute PLL paramaters   *   * @pll: information about the PLL + * @freq: requested frequency   * @dot_clock_p: resulting pixel clock - * fb_div_p: resulting feedback divider - * frac_fb_div_p: fractional part of the feedback divider - * ref_div_p: resulting reference divider - * post_div_p: resulting reference divider + * @fb_div_p: resulting feedback divider + * @frac_fb_div_p: fractional part of the feedback divider + * @ref_div_p: resulting reference divider + * @post_div_p: resulting reference divider   *   * Try to calculate the PLL parameters to generate the given frequency:   * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div) @@ -308,7 +309,6 @@ int amdgpu_pll_get_shared_dp_ppll(struct drm_crtc *crtc)   * amdgpu_pll_get_shared_nondp_ppll - return the PPLL used by another non-DP crtc   *   * @crtc: drm crtc - * @encoder: drm encoder   *   * Returns the PPLL (Pixel PLL) used by another non-DP crtc/encoder which can   * be shared (i.e., same clock). diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index 69af462db34d..19c0a3655228 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -19,17 +19,29 @@   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR   * OTHER DEALINGS IN THE SOFTWARE.   * - * Author: Jonathan Kim <jonathan.kim@amd.com> - *   */  #include <linux/perf_event.h>  #include <linux/init.h>  #include "amdgpu.h"  #include "amdgpu_pmu.h" -#include "df_v3_6.h"  #define PMU_NAME_SIZE 32 +#define NUM_FORMATS_AMDGPU_PMU		4 +#define NUM_FORMATS_DF_VEGA20		3 +#define NUM_EVENTS_DF_VEGA20		8 +#define NUM_EVENT_TYPES_VEGA20		1 +#define NUM_EVENTS_VEGA20_XGMI		2 +#define NUM_EVENTS_VEGA20_MAX		NUM_EVENTS_VEGA20_XGMI +#define NUM_EVENT_TYPES_ARCTURUS	1 +#define NUM_EVENTS_ARCTURUS_XGMI	6 +#define NUM_EVENTS_ARCTURUS_MAX		NUM_EVENTS_ARCTURUS_XGMI + +struct amdgpu_pmu_event_attribute { +	struct device_attribute attr; +	const char *event_str; +	unsigned int type; +};  /* record to keep track of pmu entry per pmu type per device */  struct amdgpu_pmu_entry { @@ -37,11 +49,162 @@ struct amdgpu_pmu_entry {  	struct amdgpu_device *adev;  	struct pmu pmu;  	unsigned int pmu_perf_type; +	char *pmu_type_name; +	char *pmu_file_prefix; +	struct attribute_group fmt_attr_group; +	struct amdgpu_pmu_event_attribute *fmt_attr; +	struct attribute_group evt_attr_group; +	struct amdgpu_pmu_event_attribute *evt_attr;  }; +static ssize_t amdgpu_pmu_event_show(struct device *dev, +				struct device_attribute *attr, char *buf) +{ +	struct amdgpu_pmu_event_attribute *amdgpu_pmu_attr; + +	amdgpu_pmu_attr = container_of(attr, struct amdgpu_pmu_event_attribute, +									attr); + +	if (!amdgpu_pmu_attr->type) +		return sprintf(buf, "%s\n", amdgpu_pmu_attr->event_str); + +	return sprintf(buf, "%s,type=0x%x\n", +			amdgpu_pmu_attr->event_str, amdgpu_pmu_attr->type); +} +  static LIST_HEAD(amdgpu_pmu_list); +struct amdgpu_pmu_attr { +	const char *name; +	const char *config; +}; + +struct amdgpu_pmu_type { +	const unsigned int type; +	const unsigned int num_of_type; +}; + +struct amdgpu_pmu_config { +	struct amdgpu_pmu_attr *formats; +	unsigned int num_formats; +	struct amdgpu_pmu_attr *events; +	unsigned int num_events; +	struct amdgpu_pmu_type *types; +	unsigned int num_types; +}; + +/* + * Events fall under two categories: + *  - PMU typed + *    Events in /sys/bus/event_source/devices/amdgpu_<pmu_type>_<dev_num> have + *    performance counter operations handled by one IP <pmu_type>.  Formats and + *    events should be defined by <pmu_type>_<asic_type>_formats and + *    <pmu_type>_<asic_type>_events respectively. + * + *  - Event config typed + *    Events in /sys/bus/event_source/devices/amdgpu_<dev_num> have performance + *    counter operations that can be handled by multiple IPs dictated by their + *    "type" format field.  Formats and events should be defined by + *    amdgpu_pmu_formats and <asic_type>_events respectively.  Format field + *    "type" is generated in amdgpu_pmu_event_show and defined in + *    <asic_type>_event_config_types. + */ + +static struct amdgpu_pmu_attr amdgpu_pmu_formats[NUM_FORMATS_AMDGPU_PMU] = { +	{ .name = "event", .config = "config:0-7" }, +	{ .name = "instance", .config = "config:8-15" }, +	{ .name = "umask", .config = "config:16-23"}, +	{ .name = "type", .config = "config:56-63"} +}; + +/* Vega20 events */ +static struct amdgpu_pmu_attr vega20_events[NUM_EVENTS_VEGA20_MAX] = { +	{ .name = "xgmi_link0_data_outbound", +			.config = "event=0x7,instance=0x46,umask=0x2" }, +	{ .name = "xgmi_link1_data_outbound", +			.config = "event=0x7,instance=0x47,umask=0x2" } +}; + +static struct amdgpu_pmu_type vega20_types[NUM_EVENT_TYPES_VEGA20] = { +	{ .type = AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI, +					.num_of_type = NUM_EVENTS_VEGA20_XGMI } +}; + +static struct amdgpu_pmu_config vega20_config = { +	.formats = amdgpu_pmu_formats, +	.num_formats = ARRAY_SIZE(amdgpu_pmu_formats), +	.events = vega20_events, +	.num_events = ARRAY_SIZE(vega20_events), +	.types = vega20_types, +	.num_types = ARRAY_SIZE(vega20_types) +}; + +/* Vega20 data fabric (DF) events */ +static struct amdgpu_pmu_attr df_vega20_formats[NUM_FORMATS_DF_VEGA20] = { +	{ .name = "event", .config = "config:0-7" }, +	{ .name = "instance", .config = "config:8-15" }, +	{ .name = "umask", .config = "config:16-23"} +}; + +static struct amdgpu_pmu_attr df_vega20_events[NUM_EVENTS_DF_VEGA20] = { +	{ .name = "cake0_pcsout_txdata", +			.config = "event=0x7,instance=0x46,umask=0x2" }, +	{ .name = "cake1_pcsout_txdata", +			.config = "event=0x7,instance=0x47,umask=0x2" }, +	{ .name = "cake0_pcsout_txmeta", +			.config = "event=0x7,instance=0x46,umask=0x4" }, +	{ .name = "cake1_pcsout_txmeta", +			.config = "event=0x7,instance=0x47,umask=0x4" }, +	{ .name = "cake0_ftiinstat_reqalloc", +			.config = "event=0xb,instance=0x46,umask=0x4" }, +	{ .name = "cake1_ftiinstat_reqalloc", +			.config = "event=0xb,instance=0x47,umask=0x4" }, +	{ .name = "cake0_ftiinstat_rspalloc", +			.config = "event=0xb,instance=0x46,umask=0x8" }, +	{ .name = "cake1_ftiinstat_rspalloc", +			.config = "event=0xb,instance=0x47,umask=0x8" } +}; + +static struct amdgpu_pmu_config df_vega20_config = { +	.formats = df_vega20_formats, +	.num_formats = ARRAY_SIZE(df_vega20_formats), +	.events = df_vega20_events, +	.num_events = ARRAY_SIZE(df_vega20_events), +	.types = NULL, +	.num_types = 0 +}; + +/* Arcturus events */ +static struct amdgpu_pmu_attr arcturus_events[NUM_EVENTS_ARCTURUS_MAX] = { +	{ .name = "xgmi_link0_data_outbound", +			.config = "event=0x7,instance=0x4b,umask=0x2" }, +	{ .name = "xgmi_link1_data_outbound", +			.config = "event=0x7,instance=0x4c,umask=0x2" }, +	{ .name = "xgmi_link2_data_outbound", +			.config = "event=0x7,instance=0x4d,umask=0x2" }, +	{ .name = "xgmi_link3_data_outbound", +			.config = "event=0x7,instance=0x4e,umask=0x2" }, +	{ .name = "xgmi_link4_data_outbound", +			.config = "event=0x7,instance=0x4f,umask=0x2" }, +	{ .name = "xgmi_link5_data_outbound", +			.config = "event=0x7,instance=0x50,umask=0x2" } +}; + +static struct amdgpu_pmu_type arcturus_types[NUM_EVENT_TYPES_ARCTURUS] = { +	{ .type = AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI, +				.num_of_type = NUM_EVENTS_ARCTURUS_XGMI } +}; + +static struct amdgpu_pmu_config arcturus_config = { +	.formats = amdgpu_pmu_formats, +	.num_formats = ARRAY_SIZE(amdgpu_pmu_formats), +	.events = arcturus_events, +	.num_events = ARRAY_SIZE(arcturus_events), +	.types = arcturus_types, +	.num_types = ARRAY_SIZE(arcturus_types) +}; +  /* initialize perf counter */  static int amdgpu_perf_event_init(struct perf_event *event)  { @@ -53,6 +216,7 @@ static int amdgpu_perf_event_init(struct perf_event *event)  	/* update the hw_perf_event struct with config data */  	hwc->config = event->attr.config; +	hwc->config_base = AMDGPU_PMU_PERF_TYPE_NONE;  	return 0;  } @@ -64,6 +228,7 @@ static void amdgpu_perf_start(struct perf_event *event, int flags)  	struct amdgpu_pmu_entry *pe = container_of(event->pmu,  						  struct amdgpu_pmu_entry,  						  pmu); +	int target_cntr = 0;  	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))  		return; @@ -71,19 +236,27 @@ static void amdgpu_perf_start(struct perf_event *event, int flags)  	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));  	hwc->state = 0; -	switch (pe->pmu_perf_type) { -	case PERF_TYPE_AMDGPU_DF: -		if (!(flags & PERF_EF_RELOAD)) -			pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 1); +	switch (hwc->config_base) { +	case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: +	case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: +		if (!(flags & PERF_EF_RELOAD)) { +			target_cntr = pe->adev->df.funcs->pmc_start(pe->adev, +						hwc->config, 0 /* unused */, +						1 /* add counter */); +			if (target_cntr < 0) +				break; + +			hwc->idx = target_cntr; +		} -		pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 0); +		pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, +								hwc->idx, 0);  		break;  	default:  		break;  	}  	perf_event_update_userpage(event); -  }  /* read perf counter */ @@ -93,16 +266,16 @@ static void amdgpu_perf_read(struct perf_event *event)  	struct amdgpu_pmu_entry *pe = container_of(event->pmu,  						  struct amdgpu_pmu_entry,  						  pmu); -  	u64 count, prev;  	do {  		prev = local64_read(&hwc->prev_count); -		switch (pe->pmu_perf_type) { -		case PERF_TYPE_AMDGPU_DF: -			pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->config, -							  &count); +		switch (hwc->config_base) { +		case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: +		case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: +			pe->adev->df.funcs->pmc_get_count(pe->adev, +						hwc->config, hwc->idx, &count);  			break;  		default:  			count = 0; @@ -124,9 +297,11 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags)  	if (hwc->state & PERF_HES_UPTODATE)  		return; -	switch (pe->pmu_perf_type) { -	case PERF_TYPE_AMDGPU_DF: -		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 0); +	switch (hwc->config_base) { +	case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: +	case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: +		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, hwc->idx, +									0);  		break;  	default:  		break; @@ -142,22 +317,39 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags)  	hwc->state |= PERF_HES_UPTODATE;  } -/* add perf counter  */ +/* add perf counter */  static int amdgpu_perf_add(struct perf_event *event, int flags)  {  	struct hw_perf_event *hwc = &event->hw; -	int retval; - +	int retval = 0, target_cntr;  	struct amdgpu_pmu_entry *pe = container_of(event->pmu,  						  struct amdgpu_pmu_entry,  						  pmu); +	switch (pe->pmu_perf_type) { +	case AMDGPU_PMU_PERF_TYPE_DF: +		hwc->config_base = AMDGPU_PMU_EVENT_CONFIG_TYPE_DF; +		break; +	case AMDGPU_PMU_PERF_TYPE_ALL: +		hwc->config_base = (hwc->config >> +					AMDGPU_PMU_EVENT_CONFIG_TYPE_SHIFT) & +					AMDGPU_PMU_EVENT_CONFIG_TYPE_MASK; +		break; +	} +  	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; -	switch (pe->pmu_perf_type) { -	case PERF_TYPE_AMDGPU_DF: -		retval = pe->adev->df.funcs->pmc_start(pe->adev, -						       hwc->config, 1); +	switch (hwc->config_base) { +	case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: +	case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: +		target_cntr = pe->adev->df.funcs->pmc_start(pe->adev, +						hwc->config, 0 /* unused */, +						1 /* add counter */); +		if (target_cntr < 0) +			retval = target_cntr; +		else +			hwc->idx = target_cntr; +  		break;  	default:  		return 0; @@ -170,7 +362,6 @@ static int amdgpu_perf_add(struct perf_event *event, int flags)  		amdgpu_perf_start(event, PERF_EF_RELOAD);  	return retval; -  }  /* delete perf counter  */ @@ -183,9 +374,11 @@ static void amdgpu_perf_del(struct perf_event *event, int flags)  	amdgpu_perf_stop(event, PERF_EF_UPDATE); -	switch (pe->pmu_perf_type) { -	case PERF_TYPE_AMDGPU_DF: -		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 1); +	switch (hwc->config_base) { +	case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: +	case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: +		pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, hwc->idx, +									1);  		break;  	default:  		break; @@ -194,25 +387,92 @@ static void amdgpu_perf_del(struct perf_event *event, int flags)  	perf_event_update_userpage(event);  } -/* vega20 pmus */ +static void amdgpu_pmu_create_event_attrs_by_type( +				struct attribute_group *attr_group, +				struct amdgpu_pmu_event_attribute *pmu_attr, +				struct amdgpu_pmu_attr events[], +				int s_offset, +				int e_offset, +				unsigned int type) +{ +	int i; + +	pmu_attr += s_offset; + +	for (i = s_offset; i < e_offset; i++) { +		attr_group->attrs[i] = &pmu_attr->attr.attr; +		sysfs_attr_init(&pmu_attr->attr.attr); +		pmu_attr->attr.attr.name = events[i].name; +		pmu_attr->attr.attr.mode = 0444; +		pmu_attr->attr.show = amdgpu_pmu_event_show; +		pmu_attr->event_str = events[i].config; +		pmu_attr->type = type; +		pmu_attr++; +	} +} -/* init pmu tracking per pmu type */ -static int init_pmu_by_type(struct amdgpu_device *adev, -		  const struct attribute_group *attr_groups[], -		  char *pmu_type_name, char *pmu_file_prefix, -		  unsigned int pmu_perf_type, -		  unsigned int num_counters) +static void amdgpu_pmu_create_attrs(struct attribute_group *attr_group, +				struct amdgpu_pmu_event_attribute *pmu_attr, +				struct amdgpu_pmu_attr events[], +				int num_events)  { -	char pmu_name[PMU_NAME_SIZE]; -	struct amdgpu_pmu_entry *pmu_entry; -	int ret = 0; +	amdgpu_pmu_create_event_attrs_by_type(attr_group, pmu_attr, events, 0, +				num_events, AMDGPU_PMU_EVENT_CONFIG_TYPE_NONE); +} -	pmu_entry = kzalloc(sizeof(struct amdgpu_pmu_entry), GFP_KERNEL); -	if (!pmu_entry) +static int amdgpu_pmu_alloc_pmu_attrs( +				struct attribute_group *fmt_attr_group, +				struct amdgpu_pmu_event_attribute **fmt_attr, +				struct attribute_group *evt_attr_group, +				struct amdgpu_pmu_event_attribute **evt_attr, +				struct amdgpu_pmu_config *config) +{ +	*fmt_attr = kcalloc(config->num_formats, sizeof(**fmt_attr), +								GFP_KERNEL); + +	if (!(*fmt_attr))  		return -ENOMEM; -	pmu_entry->adev = adev; +	fmt_attr_group->attrs = kcalloc(config->num_formats + 1, +				sizeof(*fmt_attr_group->attrs), GFP_KERNEL); + +	if (!fmt_attr_group->attrs) +		goto err_fmt_attr_grp; + +	*evt_attr = kcalloc(config->num_events, sizeof(**evt_attr), GFP_KERNEL); + +	if (!(*evt_attr)) +		goto err_evt_attr; + +	evt_attr_group->attrs = kcalloc(config->num_events + 1, +				sizeof(*evt_attr_group->attrs), GFP_KERNEL); + +	if (!evt_attr_group->attrs) +		goto err_evt_attr_grp; + +	return 0; +err_evt_attr_grp: +	kfree(*evt_attr); +err_evt_attr: +	kfree(fmt_attr_group->attrs); +err_fmt_attr_grp: +	kfree(*fmt_attr); +	return -ENOMEM; +} + +/* init pmu tracking per pmu type */ +static int init_pmu_entry_by_type_and_add(struct amdgpu_pmu_entry *pmu_entry, +			struct amdgpu_pmu_config *config) +{ +	const struct attribute_group *attr_groups[] = { +		&pmu_entry->fmt_attr_group, +		&pmu_entry->evt_attr_group, +		NULL +	}; +	char pmu_name[PMU_NAME_SIZE]; +	int ret = 0, total_num_events = 0; +  	pmu_entry->pmu = (struct pmu){  		.event_init = amdgpu_perf_event_init,  		.add = amdgpu_perf_add, @@ -223,59 +483,178 @@ static int init_pmu_by_type(struct amdgpu_device *adev,  		.task_ctx_nr = perf_invalid_context,  	}; -	pmu_entry->pmu.attr_groups = attr_groups; -	pmu_entry->pmu_perf_type = pmu_perf_type; -	snprintf(pmu_name, PMU_NAME_SIZE, "%s_%d", -				pmu_file_prefix, adev_to_drm(adev)->primary->index); +	ret = amdgpu_pmu_alloc_pmu_attrs(&pmu_entry->fmt_attr_group, +					&pmu_entry->fmt_attr, +					&pmu_entry->evt_attr_group, +					&pmu_entry->evt_attr, +					config); + +	if (ret) +		goto err_out; + +	amdgpu_pmu_create_attrs(&pmu_entry->fmt_attr_group, pmu_entry->fmt_attr, +					config->formats, config->num_formats); + +	if (pmu_entry->pmu_perf_type == AMDGPU_PMU_PERF_TYPE_ALL) { +		int i; + +		for (i = 0; i < config->num_types; i++) { +			amdgpu_pmu_create_event_attrs_by_type( +					&pmu_entry->evt_attr_group, +					pmu_entry->evt_attr, +					config->events, +					total_num_events, +					total_num_events + +						config->types[i].num_of_type, +					config->types[i].type); +			total_num_events += config->types[i].num_of_type; +		} +	} else { +		amdgpu_pmu_create_attrs(&pmu_entry->evt_attr_group, +					pmu_entry->evt_attr, +					config->events, config->num_events); +		total_num_events = config->num_events; +	} + +	pmu_entry->pmu.attr_groups = kmemdup(attr_groups, sizeof(attr_groups), +								GFP_KERNEL); + +	if (!pmu_entry->pmu.attr_groups) +		goto err_attr_group; + +	snprintf(pmu_name, PMU_NAME_SIZE, "%s_%d", pmu_entry->pmu_file_prefix, +				adev_to_drm(pmu_entry->adev)->primary->index);  	ret = perf_pmu_register(&pmu_entry->pmu, pmu_name, -1); -	if (ret) { -		kfree(pmu_entry); -		pr_warn("Error initializing AMDGPU %s PMUs.\n", pmu_type_name); -		return ret; -	} +	if (ret) +		goto err_register; + +	if (pmu_entry->pmu_perf_type != AMDGPU_PMU_PERF_TYPE_ALL) +		pr_info("Detected AMDGPU %s Counters. # of Counters = %d.\n", +				pmu_entry->pmu_type_name, total_num_events); +	else +		pr_info("Detected AMDGPU %d Perf Events.\n", total_num_events); -	pr_info("Detected AMDGPU %s Counters. # of Counters = %d.\n", -			pmu_type_name, num_counters);  	list_add_tail(&pmu_entry->entry, &amdgpu_pmu_list);  	return 0; +err_register: +	kfree(pmu_entry->pmu.attr_groups); +err_attr_group: +	kfree(pmu_entry->fmt_attr_group.attrs); +	kfree(pmu_entry->fmt_attr); +	kfree(pmu_entry->evt_attr_group.attrs); +	kfree(pmu_entry->evt_attr); +err_out: +	pr_warn("Error initializing AMDGPU %s PMUs.\n", +						pmu_entry->pmu_type_name); +	return ret; +} + +/* destroy all pmu data associated with target device */ +void amdgpu_pmu_fini(struct amdgpu_device *adev) +{ +	struct amdgpu_pmu_entry *pe, *temp; + +	list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) { +		if (pe->adev != adev) +			continue; +		list_del(&pe->entry); +		perf_pmu_unregister(&pe->pmu); +		kfree(pe->pmu.attr_groups); +		kfree(pe->fmt_attr_group.attrs); +		kfree(pe->fmt_attr); +		kfree(pe->evt_attr_group.attrs); +		kfree(pe->evt_attr); +		kfree(pe); +	} +} + +static struct amdgpu_pmu_entry *create_pmu_entry(struct amdgpu_device *adev, +						unsigned int pmu_type, +						char *pmu_type_name, +						char *pmu_file_prefix) +{ +	struct amdgpu_pmu_entry *pmu_entry; + +	pmu_entry = kzalloc(sizeof(struct amdgpu_pmu_entry), GFP_KERNEL); + +	if (!pmu_entry) +		return pmu_entry; + +	pmu_entry->adev = adev; +	pmu_entry->fmt_attr_group.name = "format"; +	pmu_entry->fmt_attr_group.attrs = NULL; +	pmu_entry->evt_attr_group.name = "events"; +	pmu_entry->evt_attr_group.attrs = NULL; +	pmu_entry->pmu_perf_type = pmu_type; +	pmu_entry->pmu_type_name = pmu_type_name; +	pmu_entry->pmu_file_prefix = pmu_file_prefix; + +	return pmu_entry;  }  /* init amdgpu_pmu */  int amdgpu_pmu_init(struct amdgpu_device *adev)  {  	int ret = 0; +	struct amdgpu_pmu_entry *pmu_entry, *pmu_entry_df;  	switch (adev->asic_type) {  	case CHIP_VEGA20: -		/* init df */ -		ret = init_pmu_by_type(adev, df_v3_6_attr_groups, -				       "DF", "amdgpu_df", PERF_TYPE_AMDGPU_DF, -				       DF_V3_6_MAX_COUNTERS); +		pmu_entry_df = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_DF, +						"DF", "amdgpu_df"); -		/* other pmu types go here*/ -		break; -	default: -		return 0; -	} +		if (!pmu_entry_df) +			return -ENOMEM; -	return 0; -} +		ret = init_pmu_entry_by_type_and_add(pmu_entry_df, +							&df_vega20_config); +		if (ret) { +			kfree(pmu_entry_df); +			return ret; +		} -/* destroy all pmu data associated with target device */ -void amdgpu_pmu_fini(struct amdgpu_device *adev) -{ -	struct amdgpu_pmu_entry *pe, *temp; +		pmu_entry = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_ALL, +						"", "amdgpu"); -	list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) { -		if (pe->adev == adev) { -			list_del(&pe->entry); -			perf_pmu_unregister(&pe->pmu); -			kfree(pe); +		if (!pmu_entry) { +			amdgpu_pmu_fini(adev); +			return -ENOMEM; +		} + +		ret = init_pmu_entry_by_type_and_add(pmu_entry, +							&vega20_config); + +		if (ret) { +			kfree(pmu_entry); +			amdgpu_pmu_fini(adev); +			return ret; +		} + +		break; +	case CHIP_ARCTURUS: +		pmu_entry = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_ALL, +						"", "amdgpu"); +		if (!pmu_entry) +			return -ENOMEM; + +		ret = init_pmu_entry_by_type_and_add(pmu_entry, +							&arcturus_config); + +		if (ret) { +			kfree(pmu_entry); +			return -ENOMEM;  		} + +		break; + +	default: +		return 0;  	} + +	return ret;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h index 7dddb7160a11..6882dc48c5d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h @@ -19,18 +19,38 @@   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR   * OTHER DEALINGS IN THE SOFTWARE.   * - * Author: Jonathan Kim <jonathan.kim@amd.com> - *   */  #ifndef _AMDGPU_PMU_H_  #define _AMDGPU_PMU_H_ +/* PMU types. */  enum amdgpu_pmu_perf_type { -	PERF_TYPE_AMDGPU_DF = 0, -	PERF_TYPE_AMDGPU_MAX +	AMDGPU_PMU_PERF_TYPE_NONE = 0, +	AMDGPU_PMU_PERF_TYPE_DF, +	AMDGPU_PMU_PERF_TYPE_ALL  }; +/* + * PMU type AMDGPU_PMU_PERF_TYPE_ALL can hold events of different "type" + * configurations.  Event config types are parsed from the 64-bit raw + * config (See EVENT_CONFIG_TYPE_SHIFT and EVENT_CONFIG_TYPE_MASK) and + * are registered into the HW perf events config_base. + * + * PMU types with only a single event configuration type + * (non-AMDGPU_PMU_PERF_TYPE_ALL) have their event config type auto generated + * when the performance counter is added. + */ +enum amdgpu_pmu_event_config_type { +	AMDGPU_PMU_EVENT_CONFIG_TYPE_NONE = 0, +	AMDGPU_PMU_EVENT_CONFIG_TYPE_DF, +	AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI, +	AMDGPU_PMU_EVENT_CONFIG_TYPE_MAX +}; + +#define AMDGPU_PMU_EVENT_CONFIG_TYPE_SHIFT	56 +#define AMDGPU_PMU_EVENT_CONFIG_TYPE_MASK	0xff +  int amdgpu_pmu_init(struct amdgpu_device *adev);  void amdgpu_pmu_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 96a9699f87ba..523d22db094b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -100,6 +100,8 @@ static int psp_early_init(void *handle)  	case CHIP_NAVI12:  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		psp_v11_0_set_psp_funcs(psp);  		psp->autoload_supported = true;  		break; @@ -288,6 +290,8 @@ psp_cmd_submit_buf(struct psp_context *psp,  	skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED ||  		psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev); +	memcpy((void*)&cmd->resp, (void*)&psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp)); +  	/* In some cases, psp response status is not 0 even there is no  	 * problem while the command is submitted. Some version of PSP FW  	 * doesn't write 0 to that field. @@ -308,9 +312,6 @@ psp_cmd_submit_buf(struct psp_context *psp,  		}  	} -	/* get xGMI session id from response buffer */ -	cmd->resp.session_id = psp->cmd_buf_mem->resp.session_id; -  	if (ucode) {  		ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo;  		ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi; @@ -509,6 +510,37 @@ static int psp_tmr_terminate(struct psp_context *psp)  	return 0;  } +int psp_get_fw_attestation_records_addr(struct psp_context *psp, +					uint64_t *output_ptr) +{ +	int ret; +	struct psp_gfx_cmd_resp *cmd; + +	if (!output_ptr) +		return -EINVAL; + +	if (amdgpu_sriov_vf(psp->adev)) +		return 0; + +	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); +	if (!cmd) +		return -ENOMEM; + +	cmd->cmd_id = GFX_CMD_ID_GET_FW_ATTESTATION; + +	ret = psp_cmd_submit_buf(psp, NULL, cmd, +				 psp->fence_buf_mc_addr); + +	if (!ret) { +		*output_ptr = ((uint64_t)cmd->resp.uresp.fwar_db_info.fwar_db_addr_lo) + +			      ((uint64_t)cmd->resp.uresp.fwar_db_info.fwar_db_addr_hi << 32); +	} + +	kfree(cmd); + +	return ret; +} +  static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,  				uint64_t asd_mc, uint32_t size)  { @@ -624,14 +656,14 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,  				     uint64_t ta_shared_mc,  				     uint32_t ta_shared_size)  { -	cmd->cmd_id 				= GFX_CMD_ID_LOAD_TA; +	cmd->cmd_id				= GFX_CMD_ID_LOAD_TA;  	cmd->cmd.cmd_load_ta.app_phy_addr_lo 	= lower_32_bits(ta_bin_mc); -	cmd->cmd.cmd_load_ta.app_phy_addr_hi 	= upper_32_bits(ta_bin_mc); -	cmd->cmd.cmd_load_ta.app_len 		= ta_bin_size; +	cmd->cmd.cmd_load_ta.app_phy_addr_hi	= upper_32_bits(ta_bin_mc); +	cmd->cmd.cmd_load_ta.app_len		= ta_bin_size;  	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ta_shared_mc);  	cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ta_shared_mc); -	cmd->cmd.cmd_load_ta.cmd_buf_len 	 = ta_shared_size; +	cmd->cmd.cmd_load_ta.cmd_buf_len	 = ta_shared_size;  }  static int psp_xgmi_init_shared_buf(struct psp_context *psp) @@ -655,9 +687,9 @@ static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,  				       uint32_t ta_cmd_id,  				       uint32_t session_id)  { -	cmd->cmd_id 				= GFX_CMD_ID_INVOKE_CMD; -	cmd->cmd.cmd_invoke_cmd.session_id 	= session_id; -	cmd->cmd.cmd_invoke_cmd.ta_cmd_id 	= ta_cmd_id; +	cmd->cmd_id				= GFX_CMD_ID_INVOKE_CMD; +	cmd->cmd.cmd_invoke_cmd.session_id	= session_id; +	cmd->cmd.cmd_invoke_cmd.ta_cmd_id	= ta_cmd_id;  }  static int psp_ta_invoke(struct psp_context *psp, @@ -806,7 +838,7 @@ int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id)  	struct ta_xgmi_shared_memory *xgmi_cmd;  	int ret; -	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; +	xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf;  	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));  	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; @@ -826,7 +858,7 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)  	struct ta_xgmi_shared_memory *xgmi_cmd;  	int ret; -	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; +	xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf;  	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));  	xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; @@ -854,7 +886,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,  	if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)  		return -EINVAL; -	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; +	xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf;  	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));  	/* Fill in the shared memory with topology information as input */ @@ -898,7 +930,7 @@ int psp_xgmi_set_topology_info(struct psp_context *psp,  	if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES)  		return -EINVAL; -	xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; +	xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf;  	memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory));  	topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; @@ -962,7 +994,7 @@ static int psp_ras_load(struct psp_context *psp)  	ret = psp_cmd_submit_buf(psp, NULL, cmd,  			psp->fence_buf_mc_addr); -	ras_cmd = (struct ta_ras_shared_memory*)psp->ras.ras_shared_buf; +	ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;  	if (!ret) {  		psp->ras.session_id = cmd->resp.session_id; @@ -1884,7 +1916,7 @@ static int psp_execute_np_fw_load(struct psp_context *psp,  static int psp_load_smu_fw(struct psp_context *psp)  {  	int ret; -	struct amdgpu_device* adev = psp->adev; +	struct amdgpu_device *adev = psp->adev;  	struct amdgpu_firmware_info *ucode =  			&adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];  	struct amdgpu_ras *ras = psp->ras.ras; @@ -1893,7 +1925,8 @@ static int psp_load_smu_fw(struct psp_context *psp)  		return 0; -	if (amdgpu_in_reset(adev) && ras && ras->supported) { +	if (amdgpu_in_reset(adev) && ras && ras->supported && +		adev->asic_type == CHIP_ARCTURUS) {  		ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);  		if (ret) {  			DRM_WARN("Failed to set MP1 state prepare for reload\n"); @@ -1950,7 +1983,7 @@ static int psp_np_fw_load(struct psp_context *psp)  {  	int i, ret;  	struct amdgpu_firmware_info *ucode; -	struct amdgpu_device* adev = psp->adev; +	struct amdgpu_device *adev = psp->adev;  	if (psp->autoload_supported &&  	    !psp->pmfw_centralized_cstate_management) { @@ -1974,8 +2007,8 @@ static int psp_np_fw_load(struct psp_context *psp)  			continue;  		if (psp->autoload_supported && -		    (adev->asic_type == CHIP_SIENNA_CICHLID || -		     adev->asic_type == CHIP_NAVY_FLOUNDER) && +		    (adev->asic_type >= CHIP_SIENNA_CICHLID && +		     adev->asic_type <= CHIP_DIMGREY_CAVEFISH) &&  		    (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 ||  		     ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 ||  		     ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3)) @@ -2390,7 +2423,7 @@ int psp_init_asd_microcode(struct psp_context *psp,  			   const char *chip_name)  {  	struct amdgpu_device *adev = psp->adev; -	char fw_name[30]; +	char fw_name[PSP_FW_NAME_LEN];  	const struct psp_firmware_header_v1_0 *asd_hdr;  	int err = 0; @@ -2422,11 +2455,47 @@ out:  	return err;  } -int psp_init_sos_microcode(struct psp_context *psp, +int psp_init_toc_microcode(struct psp_context *psp,  			   const char *chip_name)  {  	struct amdgpu_device *adev = psp->adev;  	char fw_name[30]; +	const struct psp_firmware_header_v1_0 *toc_hdr; +	int err = 0; + +	if (!chip_name) { +		dev_err(adev->dev, "invalid chip name for toc microcode\n"); +		return -EINVAL; +	} + +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", chip_name); +	err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev); +	if (err) +		goto out; + +	err = amdgpu_ucode_validate(adev->psp.toc_fw); +	if (err) +		goto out; + +	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; +	adev->psp.toc_fw_version = le32_to_cpu(toc_hdr->header.ucode_version); +	adev->psp.toc_feature_version = le32_to_cpu(toc_hdr->ucode_feature_version); +	adev->psp.toc_bin_size = le32_to_cpu(toc_hdr->header.ucode_size_bytes); +	adev->psp.toc_start_addr = (uint8_t *)toc_hdr + +				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); +	return 0; +out: +	dev_err(adev->dev, "fail to request/validate toc microcode\n"); +	release_firmware(adev->psp.toc_fw); +	adev->psp.toc_fw = NULL; +	return err; +} + +int psp_init_sos_microcode(struct psp_context *psp, +			   const char *chip_name) +{ +	struct amdgpu_device *adev = psp->adev; +	char fw_name[PSP_FW_NAME_LEN];  	const struct psp_firmware_header_v1_0 *sos_hdr;  	const struct psp_firmware_header_v1_1 *sos_hdr_v1_1;  	const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; @@ -2505,9 +2574,9 @@ out:  	return err;  } -int parse_ta_bin_descriptor(struct psp_context *psp, -			    const struct ta_fw_bin_desc *desc, -			    const struct ta_firmware_header_v2_0 *ta_hdr) +static int parse_ta_bin_descriptor(struct psp_context *psp, +				   const struct ta_fw_bin_desc *desc, +				   const struct ta_firmware_header_v2_0 *ta_hdr)  {  	uint8_t *ucode_start_addr  = NULL; @@ -2520,10 +2589,11 @@ int parse_ta_bin_descriptor(struct psp_context *psp,  	switch (desc->fw_type) {  	case TA_FW_TYPE_PSP_ASD: -		psp->asd_fw_version 	   = le32_to_cpu(desc->fw_version); +		psp->asd_fw_version	   = le32_to_cpu(desc->fw_version);  		psp->asd_feature_version   = le32_to_cpu(desc->fw_version); -		psp->asd_ucode_size 	   = le32_to_cpu(desc->size_bytes); +		psp->asd_ucode_size	   = le32_to_cpu(desc->size_bytes);  		psp->asd_start_addr 	   = ucode_start_addr; +		psp->asd_fw                = psp->ta_fw;  		break;  	case TA_FW_TYPE_PSP_XGMI:  		psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version); @@ -2562,7 +2632,7 @@ int psp_init_ta_microcode(struct psp_context *psp,  			  const char *chip_name)  {  	struct amdgpu_device *adev = psp->adev; -	char fw_name[30]; +	char fw_name[PSP_FW_NAME_LEN];  	const struct ta_firmware_header_v2_0 *ta_hdr;  	int err = 0;  	int ta_index = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 919d2fb7427b..da250bc1ac57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -41,6 +41,7 @@  #define PSP_DTM_SHARED_MEM_SIZE	0x4000  #define PSP_RAP_SHARED_MEM_SIZE	0x4000  #define PSP_SHARED_MEM_SIZE		0x4000 +#define PSP_FW_NAME_LEN		0x24  struct psp_context;  struct psp_xgmi_node_info; @@ -253,6 +254,11 @@ struct psp_context  	uint32_t			asd_ucode_size;  	uint8_t				*asd_start_addr; +	/* toc firmware */ +	const struct firmware		*toc_fw; +	uint32_t			toc_fw_version; +	uint32_t			toc_feature_version; +  	/* fence buffer */  	struct amdgpu_bo		*fence_buf_bo;  	uint64_t			fence_buf_mc_addr; @@ -386,8 +392,12 @@ int psp_ring_cmd_submit(struct psp_context *psp,  			int index);  int psp_init_asd_microcode(struct psp_context *psp,  			   const char *chip_name); +int psp_init_toc_microcode(struct psp_context *psp, +			   const char *chip_name);  int psp_init_sos_microcode(struct psp_context *psp,  			   const char *chip_name);  int psp_init_ta_microcode(struct psp_context *psp,  			  const char *chip_name); +int psp_get_fw_attestation_records_addr(struct psp_context *psp, +					uint64_t *output_ptr);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4e36551ab50b..c136bd449744 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -80,6 +80,8 @@ enum amdgpu_ras_retire_page_reservation {  atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); +static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, +				uint64_t addr);  static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,  				uint64_t addr); @@ -516,9 +518,9 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,  /* obj end */  static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev, -				  const char* 		invoke_type, -				  const char* 		block_name, -				  enum ta_ras_status 	ret) +					 const char* invoke_type, +					 const char* block_name, +					 enum ta_ras_status ret)  {  	switch (ret) {  	case TA_RAS_STATUS__SUCCESS: @@ -607,7 +609,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,  	if (!con)  		return -EINVAL; -        info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL); +	info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);  	if (!info)  		return -ENOMEM; @@ -903,13 +905,6 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,  	return ret;  } -int amdgpu_ras_error_cure(struct amdgpu_device *adev, -		struct ras_cure_if *info) -{ -	/* psp fw has no cure interface for now. */ -	return 0; -} -  /* get the total error counts on all IPs */  unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,  		bool is_ce) @@ -953,7 +948,7 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)  	case AMDGPU_RAS_RETIRE_PAGE_FAULT:  	default:  		return "F"; -	}; +	}  }  /** @@ -1172,7 +1167,7 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)  			con->dir, &con->disable_ras_err_cnt_harvest);  } -void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, +static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,  		struct ras_fs_if *head)  {  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1194,7 +1189,6 @@ void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,  void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)  { -#if defined(CONFIG_DEBUG_FS)  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);  	struct ras_manager *obj;  	struct ras_fs_if fs_info; @@ -1203,7 +1197,7 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)  	 * it won't be called in resume path, no need to check  	 * suspend and gpu reset status  	 */ -	if (!con) +	if (!IS_ENABLED(CONFIG_DEBUG_FS) || !con)  		return;  	amdgpu_ras_debugfs_create_ctrl_node(adev); @@ -1217,10 +1211,9 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)  			amdgpu_ras_debugfs_create(adev, &fs_info);  		}  	} -#endif  } -void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, +static void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,  		struct ras_common_if *head)  {  	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); @@ -1234,7 +1227,6 @@ void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,  static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)  { -#if defined(CONFIG_DEBUG_FS)  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);  	struct ras_manager *obj, *tmp; @@ -1243,7 +1235,6 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)  	}  	con->dir = NULL; -#endif  }  /* debugfs end */ @@ -1291,7 +1282,8 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)  static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)  { -	amdgpu_ras_debugfs_remove_all(adev); +	if (IS_ENABLED(CONFIG_DEBUG_FS)) +		amdgpu_ras_debugfs_remove_all(adev);  	amdgpu_ras_sysfs_remove_all(adev);  	return 0;  } @@ -1477,8 +1469,8 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)  }  /* Parse RdRspStatus and WrRspStatus */ -void amdgpu_ras_error_status_query(struct amdgpu_device *adev, -		struct ras_query_if *info) +static void amdgpu_ras_error_status_query(struct amdgpu_device *adev, +					  struct ras_query_if *info)  {  	/*  	 * Only two block need to query read/write @@ -1551,10 +1543,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,  			.size = AMDGPU_GPU_PAGE_SIZE,  			.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,  		}; - -		if (data->last_reserved <= i) +		ret = amdgpu_vram_mgr_query_page_status( +				ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), +				data->bps[i].retired_page); +		if (ret == -EBUSY)  			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; -		else if (data->bps_bo[i] == NULL) +		else if (ret == -ENOENT)  			(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;  	} @@ -1606,12 +1600,9 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,  	unsigned int new_space = old_space + pages;  	unsigned int align_space = ALIGN(new_space, 512);  	void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); -	struct amdgpu_bo **bps_bo = -			kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL); -	if (!bps || !bps_bo) { +	if (!bps) {  		kfree(bps); -		kfree(bps_bo);  		return -ENOMEM;  	} @@ -1620,14 +1611,8 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,  				data->count * sizeof(*data->bps));  		kfree(data->bps);  	} -	if (data->bps_bo) { -		memcpy(bps_bo, data->bps_bo, -				data->count * sizeof(*data->bps_bo)); -		kfree(data->bps_bo); -	}  	data->bps = bps; -	data->bps_bo = bps_bo;  	data->space_left += align_space - old_space;  	return 0;  } @@ -1639,6 +1624,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);  	struct ras_err_handler_data *data;  	int ret = 0; +	uint32_t i;  	if (!con || !con->eh_data || !bps || pages <= 0)  		return 0; @@ -1648,16 +1634,26 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,  	if (!data)  		goto out; -	if (data->space_left <= pages) -		if (amdgpu_ras_realloc_eh_data_space(adev, data, pages)) { +	for (i = 0; i < pages; i++) { +		if (amdgpu_ras_check_bad_page_unlock(con, +			bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) +			continue; + +		if (!data->space_left && +			amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {  			ret = -ENOMEM;  			goto out;  		} -	memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps)); -	data->count += pages; -	data->space_left -= pages; +		amdgpu_vram_mgr_reserve_range( +			ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), +			bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT, +			AMDGPU_GPU_PAGE_SIZE); +		memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps)); +		data->count++; +		data->space_left--; +	}  out:  	mutex_unlock(&con->recovery_lock); @@ -1668,7 +1664,7 @@ out:   * write error record array to eeprom, the function should be   * protected by recovery_lock   */ -static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) +int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)  {  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);  	struct ras_err_handler_data *data; @@ -1730,6 +1726,20 @@ out:  	return ret;  } +static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, +				uint64_t addr) +{ +	struct ras_err_handler_data *data = con->eh_data; +	int i; + +	addr >>= AMDGPU_GPU_PAGE_SHIFT; +	for (i = 0; i < data->count; i++) +		if (addr == data->bps[i].retired_page) +			return true; + +	return false; +} +  /*   * check if an address belongs to bad page   * @@ -1739,26 +1749,13 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,  				uint64_t addr)  {  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev); -	struct ras_err_handler_data *data; -	int i;  	bool ret = false;  	if (!con || !con->eh_data)  		return ret;  	mutex_lock(&con->recovery_lock); -	data = con->eh_data; -	if (!data) -		goto out; - -	addr >>= AMDGPU_GPU_PAGE_SHIFT; -	for (i = 0; i < data->count; i++) -		if (addr == data->bps[i].retired_page) { -			ret = true; -			goto out; -		} - -out: +	ret = amdgpu_ras_check_bad_page_unlock(con, addr);  	mutex_unlock(&con->recovery_lock);  	return ret;  } @@ -1804,80 +1801,6 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,  	}  } -/* called in gpu recovery/init */ -int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) -{ -	struct amdgpu_ras *con = amdgpu_ras_get_context(adev); -	struct ras_err_handler_data *data; -	uint64_t bp; -	struct amdgpu_bo *bo = NULL; -	int i, ret = 0; - -	/* Not reserve bad page when amdgpu_bad_page_threshold == 0. */ -	if (!con || !con->eh_data || (amdgpu_bad_page_threshold == 0)) -		return 0; - -	mutex_lock(&con->recovery_lock); -	data = con->eh_data; -	if (!data) -		goto out; -	/* reserve vram at driver post stage. */ -	for (i = data->last_reserved; i < data->count; i++) { -		bp = data->bps[i].retired_page; - -		/* There are two cases of reserve error should be ignored: -		 * 1) a ras bad page has been allocated (used by someone); -		 * 2) a ras bad page has been reserved (duplicate error injection -		 *    for one page); -		 */ -		if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, -					       AMDGPU_GPU_PAGE_SIZE, -					       AMDGPU_GEM_DOMAIN_VRAM, -					       &bo, NULL)) -			dev_warn(adev->dev, "RAS WARN: reserve vram for " -					"retired page %llx fail\n", bp); - -		data->bps_bo[i] = bo; -		data->last_reserved = i + 1; -		bo = NULL; -	} - -	/* continue to save bad pages to eeprom even reesrve_vram fails */ -	ret = amdgpu_ras_save_bad_pages(adev); -out: -	mutex_unlock(&con->recovery_lock); -	return ret; -} - -/* called when driver unload */ -static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev) -{ -	struct amdgpu_ras *con = amdgpu_ras_get_context(adev); -	struct ras_err_handler_data *data; -	struct amdgpu_bo *bo; -	int i; - -	if (!con || !con->eh_data) -		return 0; - -	mutex_lock(&con->recovery_lock); -	data = con->eh_data; -	if (!data) -		goto out; - -	for (i = data->last_reserved - 1; i >= 0; i--) { -		bo = data->bps_bo[i]; - -		amdgpu_bo_free_kernel(&bo, NULL, NULL); - -		data->bps_bo[i] = bo; -		data->last_reserved = i; -	} -out: -	mutex_unlock(&con->recovery_lock); -	return 0; -} -  int amdgpu_ras_recovery_init(struct amdgpu_device *adev)  {  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1917,18 +1840,12 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)  		ret = amdgpu_ras_load_bad_pages(adev);  		if (ret)  			goto free; -		ret = amdgpu_ras_reserve_bad_pages(adev); -		if (ret) -			goto release;  	}  	return 0; -release: -	amdgpu_ras_release_bad_pages(adev);  free:  	kfree((*data)->bps); -	kfree((*data)->bps_bo);  	kfree(*data);  	con->eh_data = NULL;  out: @@ -1956,12 +1873,10 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)  		return 0;  	cancel_work_sync(&con->recovery_work); -	amdgpu_ras_release_bad_pages(adev);  	mutex_lock(&con->recovery_lock);  	con->eh_data = NULL;  	kfree(data->bps); -	kfree(data->bps_bo);  	kfree(data);  	mutex_unlock(&con->recovery_lock); @@ -2156,7 +2071,7 @@ void amdgpu_ras_late_fini(struct amdgpu_device *adev,  	amdgpu_ras_sysfs_remove(adev, ras_block);  	if (ih_info->cb) -                amdgpu_ras_interrupt_remove_handler(adev, ih_info); +		amdgpu_ras_interrupt_remove_handler(adev, ih_info);  	amdgpu_ras_feature_enable(adev, ras_block, 0);  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 6b8d7bb83bb3..762f5e46c007 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -33,7 +33,6 @@  #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS		(0x1 << 0)  #define AMDGPU_RAS_FLAG_INIT_NEED_RESET		(0x1 << 1) -#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV	(0x1 << 2)  enum amdgpu_ras_block {  	AMDGPU_RAS_BLOCK__UMC = 0, @@ -363,14 +362,10 @@ struct ras_err_data {  struct ras_err_handler_data {  	/* point to bad page records array */  	struct eeprom_table_record *bps; -	/* point to reserved bo array */ -	struct amdgpu_bo **bps_bo;  	/* the count of entries */  	int count;  	/* the space can place new entries */  	int space_left; -	/* last reserved entry's index + 1 */ -	int last_reserved;  };  typedef int (*ras_ih_cb)(struct amdgpu_device *adev, @@ -506,22 +501,12 @@ bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev);  int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,  		struct eeprom_table_record *bps, int pages); -int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); +int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);  static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)  {  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); -	/* -	 * Save bad page to eeprom before gpu reset, i2c may be unstable -	 * in gpu reset. -	 * -	 * Also, exclude the case when ras recovery issuer is -	 * eeprom page write itself. -	 */ -	if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task()) -		amdgpu_ras_reserve_bad_pages(adev); -  	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)  		schedule_work(&ras->recovery_work);  	return 0; @@ -607,14 +592,8 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,  int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,  		struct ras_common_if *head); -void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, -		struct ras_fs_if *head); -  void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev); -void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, -		struct ras_common_if *head); -  int amdgpu_ras_error_query(struct amdgpu_device *adev,  		struct ras_query_if *info); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 0e64c39a2372..1dd040166c63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -27,9 +27,9 @@  #include <linux/bits.h>  #include "atom.h" -#define EEPROM_I2C_TARGET_ADDR_VEGA20    	0xA0 -#define EEPROM_I2C_TARGET_ADDR_ARCTURUS  	0xA8 -#define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342  	0xA0 +#define EEPROM_I2C_TARGET_ADDR_VEGA20		0xA0 +#define EEPROM_I2C_TARGET_ADDR_ARCTURUS		0xA8 +#define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342	0xA0  /*   * The 2 macros bellow represent the actual size in bytes that @@ -124,11 +124,11 @@ static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_heade  {  	uint32_t *pp = (uint32_t *)buff; -	hdr->header 	      = le32_to_cpu(pp[0]); -	hdr->version 	      = le32_to_cpu(pp[1]); +	hdr->header	      = le32_to_cpu(pp[0]); +	hdr->version	      = le32_to_cpu(pp[1]);  	hdr->first_rec_offset = le32_to_cpu(pp[2]); -	hdr->tbl_size 	      = le32_to_cpu(pp[3]); -	hdr->checksum 	      = le32_to_cpu(pp[4]); +	hdr->tbl_size	      = le32_to_cpu(pp[3]); +	hdr->checksum	      = le32_to_cpu(pp[4]);  }  static int __update_table_header(struct amdgpu_ras_eeprom_control *control, @@ -149,7 +149,11 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control,  	msg.addr = control->i2c_address; +	/* i2c may be unstable in gpu reset */ +	down_read(&adev->reset_sem);  	ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); +	up_read(&adev->reset_sem); +  	if (ret < 1)  		DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); @@ -475,7 +479,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,  	int i, ret = 0;  	struct i2c_msg *msgs, *msg;  	unsigned char *buffs, *buff; -	bool sched_ras_recovery = false;  	struct eeprom_table_record *record;  	struct amdgpu_device *adev = to_amdgpu_device(control);  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -513,7 +516,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,  			"Saved bad pages(%d) reaches threshold value(%d).\n",  			control->num_recs + num, ras->bad_page_cnt_threshold);  		control->tbl_hdr.header = EEPROM_TABLE_HDR_BAD; -		sched_ras_recovery = true;  	}  	/* In case of overflow just start from beginning to not lose newest records */ @@ -557,7 +559,11 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,  		control->next_addr += EEPROM_TABLE_RECORD_SIZE;  	} +	/* i2c may be unstable in gpu reset */ +	down_read(&adev->reset_sem);  	ret = i2c_transfer(&adev->pm.smu_i2c, msgs, num); +	up_read(&adev->reset_sem); +  	if (ret < 1) {  		DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret); @@ -595,20 +601,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,  		__update_tbl_checksum(control, records, num, old_hdr_byte_sum);  		__update_table_header(control, buffs); - -		if (sched_ras_recovery) { -			/* -			 * Before scheduling ras recovery, assert the related -			 * flag first, which shall bypass common bad page -			 * reservation execution in amdgpu_ras_reset_gpu. -			 */ -			amdgpu_ras_get_context(adev)->flags |= -				AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV; - -			dev_warn(adev->dev, "Conduct ras recovery due to bad " -				"page threshold reached.\n"); -			amdgpu_ras_reset_gpu(adev); -		}  	} else if (!__validate_tbl_checksum(control, records, num)) {  		DRM_WARN("EEPROM Table checksum mismatch!");  		/* TODO Uncomment when EEPROM read/write is relliable */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 15ee13c3bd9e..1a612f51ecd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -52,7 +52,6 @@  /**   * amdgpu_ring_alloc - allocate space on the ring buffer   * - * @adev: amdgpu_device pointer   * @ring: amdgpu_ring structure holding ring information   * @ndw: number of dwords to allocate in the ring buffer   * @@ -95,7 +94,8 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)  		amdgpu_ring_write(ring, ring->funcs->nop);  } -/** amdgpu_ring_generic_pad_ib - pad IB with NOP packets +/** + * amdgpu_ring_generic_pad_ib - pad IB with NOP packets   *   * @ring: amdgpu_ring structure holding ring information   * @ib: IB to add NOP packets to @@ -112,7 +112,6 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)   * amdgpu_ring_commit - tell the GPU to execute the new   * commands on the ring buffer   * - * @adev: amdgpu_device pointer   * @ring: amdgpu_ring structure holding ring information   *   * Update the wptr (write pointer) to tell the GPU to @@ -155,8 +154,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)   *   * @adev: amdgpu_device pointer   * @ring: amdgpu_ring structure holding ring information - * @max_ndw: maximum number of dw for ring alloc - * @nop: nop packet for this ring + * @max_dw: maximum number of dw for ring alloc + * @irq_src: interrupt source to use for this ring + * @irq_type: interrupt type to use for this ring + * @hw_prio: ring priority (NORMAL/HIGH)   *   * Initialize the driver information for the selected ring (all asics).   * Returns 0 on success, error on failure. @@ -276,7 +277,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,  /**   * amdgpu_ring_fini - tear down the driver ring struct.   * - * @adev: amdgpu_device pointer   * @ring: amdgpu_ring structure holding ring information   *   * Tear down the driver information for the selected ring (all asics). @@ -310,7 +310,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)  /**   * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper   * - * @adev: amdgpu_device pointer + * @ring: ring to write to   * @reg0: register to write   * @reg1: register to wait on   * @ref: reference value to write/wait on @@ -396,7 +396,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,  			return result;  		value = ring->ring[(*pos - 12)/4]; -		r = put_user(value, (uint32_t*)buf); +		r = put_user(value, (uint32_t *)buf);  		if (r)  			return r;  		buf += 4; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 0bd1d4ffc19e..524d10b21041 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -75,7 +75,7 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,  }  void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, -                              struct amdgpu_sa_manager *sa_manager) +			       struct amdgpu_sa_manager *sa_manager)  {  	struct amdgpu_sa_bo *sa_bo, *tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 0da0a0d98672..b7d861ed5284 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -29,7 +29,7 @@  #include <drm/amdgpu_drm.h>  #include "amdgpu.h" - +#include "amdgpu_sched.h"  #include "amdgpu_vm.h"  int amdgpu_to_sched_priority(int amdgpu_priority, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 250a309e4dee..de91d29c9d96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -126,7 +126,7 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,  		goto free;  	} -        return 0; +	return 0;  late_fini:  	amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h new file mode 100644 index 000000000000..03009157aec8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -0,0 +1,37 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_SMUIO_H__ +#define __AMDGPU_SMUIO_H__ + +struct amdgpu_smuio_funcs { +	u32 (*get_rom_index_offset)(struct amdgpu_device *adev); +	u32 (*get_rom_data_offset)(struct amdgpu_device *adev); +	void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable); +	void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); +}; + +struct amdgpu_smuio { +	const struct amdgpu_smuio_funcs		*funcs; +}; + +#endif /* __AMDGPU_SMUIO_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 8ea6c49529e7..4e558632a5d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -78,7 +78,7 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,  /**   * amdgpu_sync_get_owner - extract the owner of a fence   * - * @fence: fence get the owner from + * @f: fence get the owner from   *   * Extract who originally created the fence.   */ @@ -172,7 +172,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)  /**   * amdgpu_sync_vm_fence - remember to sync to this VM fence   * - * @adev: amdgpu device   * @sync: sync object to add fence to   * @fence: the VM fence to add   * @@ -190,6 +189,7 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)  /**   * amdgpu_sync_resv - sync to a reservation object   * + * @adev: amdgpu device   * @sync: sync object to add fences from reservation object to   * @resv: reservation object with embedded fence   * @mode: how owner affects which fences we sync to diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 2f4d5ca9894f..7b230bcbf2c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -42,16 +42,11 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)  	size = 1024 * 1024;  	/* Number of tests = -	 * (Total GTT - IB pool - writeback page - ring buffers) / test size +	 * (Total GTT - gart_pin_size - (2 transfer windows for buffer moves)) / test size  	 */ -	n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE; -	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) -		if (adev->rings[i]) -			n -= adev->rings[i]->ring_size; -	if (adev->wb.wb_obj) -		n -= AMDGPU_GPU_PAGE_SIZE; -	if (adev->irq.ih.ring_obj) -		n -= adev->irq.ih.ring_size; +	n = adev->gmc.gart_size - atomic64_read(&adev->gart_pin_size); +	n -= AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS * +		AMDGPU_GPU_PAGE_SIZE;  	n /= size;  	gtt_obj = kcalloc(n, sizeof(*gtt_obj), GFP_KERNEL); @@ -157,10 +152,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)  					  i, *vram_start, gart_start,  					  (unsigned long long)  					  (gart_addr - adev->gmc.gart_start + -					   (void*)gart_start - gtt_map), +					   (void *)gart_start - gtt_map),  					  (unsigned long long)  					  (vram_addr - adev->gmc.vram_start + -					   (void*)gart_start - gtt_map)); +					   (void *)gart_start - gtt_map));  				amdgpu_bo_kunmap(vram_obj);  				goto out_lclean_unpin;  			} @@ -203,10 +198,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)  					  i, *gart_start, vram_start,  					  (unsigned long long)  					  (vram_addr - adev->gmc.vram_start + -					   (void*)vram_start - vram_map), +					   (void *)vram_start - vram_map),  					  (unsigned long long)  					  (gart_addr - adev->gmc.gart_start + -					   (void*)vram_start - vram_map)); +					   (void *)vram_start - vram_map));  				amdgpu_bo_kunmap(gtt_obj[i]);  				goto out_lclean_unpin;  			} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index ee9480d14cbc..6752d8b13118 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -358,23 +358,24 @@ TRACE_EVENT(amdgpu_vm_update_ptes,  			}  	),  	TP_printk("pid:%u vm_ctx:0x%llx start:0x%010llx end:0x%010llx," -		  " flags:0x%llx, incr:%llu, dst:\n%s", __entry->pid, +		  " flags:0x%llx, incr:%llu, dst:\n%s%s", __entry->pid,  		  __entry->vm_ctx, __entry->start, __entry->end,  		  __entry->flags, __entry->incr,  __print_array( -		  __get_dynamic_array(dst), __entry->nptes, 8)) +		  __get_dynamic_array(dst), min(__entry->nptes, 32u), 8), +		  __entry->nptes > 32 ? "..." : "")  );  TRACE_EVENT(amdgpu_vm_set_ptes,  	    TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, -		     uint32_t incr, uint64_t flags, bool direct), -	    TP_ARGS(pe, addr, count, incr, flags, direct), +		     uint32_t incr, uint64_t flags, bool immediate), +	    TP_ARGS(pe, addr, count, incr, flags, immediate),  	    TP_STRUCT__entry(  			     __field(u64, pe)  			     __field(u64, addr)  			     __field(u32, count)  			     __field(u32, incr)  			     __field(u64, flags) -			     __field(bool, direct) +			     __field(bool, immediate)  			     ),  	    TP_fast_assign( @@ -383,32 +384,32 @@ TRACE_EVENT(amdgpu_vm_set_ptes,  			   __entry->count = count;  			   __entry->incr = incr;  			   __entry->flags = flags; -			   __entry->direct = direct; +			   __entry->immediate = immediate;  			   ),  	    TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, " -		      "direct=%d", __entry->pe, __entry->addr, __entry->incr, -		      __entry->flags, __entry->count, __entry->direct) +		      "immediate=%d", __entry->pe, __entry->addr, __entry->incr, +		      __entry->flags, __entry->count, __entry->immediate)  );  TRACE_EVENT(amdgpu_vm_copy_ptes, -	    TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct), -	    TP_ARGS(pe, src, count, direct), +	    TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool immediate), +	    TP_ARGS(pe, src, count, immediate),  	    TP_STRUCT__entry(  			     __field(u64, pe)  			     __field(u64, src)  			     __field(u32, count) -			     __field(bool, direct) +			     __field(bool, immediate)  			     ),  	    TP_fast_assign(  			   __entry->pe = pe;  			   __entry->src = src;  			   __entry->count = count; -			   __entry->direct = direct; +			   __entry->immediate = immediate;  			   ), -	    TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d", +	    TP_printk("pe=%010Lx, src=%010Lx, count=%u, immediate=%d",  		      __entry->pe, __entry->src, __entry->count, -		      __entry->direct) +		      __entry->immediate)  );  TRACE_EVENT(amdgpu_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8039d2399584..4d8f19ab1014 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -47,7 +47,6 @@  #include <drm/ttm/ttm_bo_driver.h>  #include <drm/ttm/ttm_placement.h>  #include <drm/ttm/ttm_module.h> -#include <drm/ttm/ttm_page_alloc.h>  #include <drm/drm_debugfs.h>  #include <drm/amdgpu_drm.h> @@ -66,13 +65,15 @@  static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev,  				   struct ttm_tt *ttm,  				   struct ttm_resource *bo_mem); +static void amdgpu_ttm_backend_unbind(struct ttm_bo_device *bdev, +				      struct ttm_tt *ttm);  static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,  				    unsigned int type, -				    uint64_t size) +				    uint64_t size_in_page)  {  	return ttm_range_man_init(&adev->mman.bdev, type, -				  false, size >> PAGE_SHIFT); +				  false, size_in_page);  }  /** @@ -92,7 +93,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  		.fpfn = 0,  		.lpfn = 0,  		.mem_type = TTM_PL_SYSTEM, -		.flags = TTM_PL_MASK_CACHING +		.flags = 0  	};  	/* Don't handle scatter gather BOs */ @@ -292,11 +293,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  	cpu_addr = &job->ibs[0].ptr[num_dw];  	if (mem->mem_type == TTM_PL_TT) { -		struct ttm_dma_tt *dma;  		dma_addr_t *dma_address; -		dma = container_of(bo->ttm, struct ttm_dma_tt, ttm); -		dma_address = &dma->dma_address[offset >> PAGE_SHIFT]; +		dma_address = &bo->ttm->dma_address[offset >> PAGE_SHIFT];  		r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags,  				    cpu_addr);  		if (r) @@ -452,7 +451,7 @@ error:  	return r;  } -/** +/*   * amdgpu_move_blit - Copy an entire buffer to another buffer   *   * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to @@ -513,116 +512,7 @@ error:  	return r;  } -/** - * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer - * - * Called by amdgpu_bo_move(). - */ -static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, -				struct ttm_operation_ctx *ctx, -				struct ttm_resource *new_mem) -{ -	struct ttm_resource *old_mem = &bo->mem; -	struct ttm_resource tmp_mem; -	struct ttm_place placements; -	struct ttm_placement placement; -	int r; - -	/* create space/pages for new_mem in GTT space */ -	tmp_mem = *new_mem; -	tmp_mem.mm_node = NULL; -	placement.num_placement = 1; -	placement.placement = &placements; -	placement.num_busy_placement = 1; -	placement.busy_placement = &placements; -	placements.fpfn = 0; -	placements.lpfn = 0; -	placements.mem_type = TTM_PL_TT; -	placements.flags = TTM_PL_MASK_CACHING; -	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); -	if (unlikely(r)) { -		pr_err("Failed to find GTT space for blit from VRAM\n"); -		return r; -	} - -	/* set caching flags */ -	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement); -	if (unlikely(r)) { -		goto out_cleanup; -	} - -	r = ttm_tt_populate(bo->bdev, bo->ttm, ctx); -	if (unlikely(r)) -		goto out_cleanup; - -	/* Bind the memory to the GTT space */ -	r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, &tmp_mem); -	if (unlikely(r)) { -		goto out_cleanup; -	} - -	/* blit VRAM to GTT */ -	r = amdgpu_move_blit(bo, evict, &tmp_mem, old_mem); -	if (unlikely(r)) { -		goto out_cleanup; -	} - -	/* move BO (in tmp_mem) to new_mem */ -	r = ttm_bo_move_ttm(bo, ctx, new_mem); -out_cleanup: -	ttm_resource_free(bo, &tmp_mem); -	return r; -} - -/** - * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM - * - * Called by amdgpu_bo_move(). - */ -static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, -				struct ttm_operation_ctx *ctx, -				struct ttm_resource *new_mem) -{ -	struct ttm_resource *old_mem = &bo->mem; -	struct ttm_resource tmp_mem; -	struct ttm_placement placement; -	struct ttm_place placements; -	int r; - -	/* make space in GTT for old_mem buffer */ -	tmp_mem = *new_mem; -	tmp_mem.mm_node = NULL; -	placement.num_placement = 1; -	placement.placement = &placements; -	placement.num_busy_placement = 1; -	placement.busy_placement = &placements; -	placements.fpfn = 0; -	placements.lpfn = 0; -	placements.mem_type = TTM_PL_TT; -	placements.flags = TTM_PL_MASK_CACHING; -	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); -	if (unlikely(r)) { -		pr_err("Failed to find GTT space for blit to VRAM\n"); -		return r; -	} - -	/* move/bind old memory to GTT space */ -	r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); -	if (unlikely(r)) { -		goto out_cleanup; -	} - -	/* copy to VRAM */ -	r = amdgpu_move_blit(bo, evict, new_mem, old_mem); -	if (unlikely(r)) { -		goto out_cleanup; -	} -out_cleanup: -	ttm_resource_free(bo, &tmp_mem); -	return r; -} - -/** +/*   * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy   *   * Called by amdgpu_bo_move() @@ -646,39 +536,55 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev,  		<= adev->gmc.visible_vram_size;  } -/** +/*   * amdgpu_bo_move - Move a buffer object to a new memory location   *   * Called by ttm_bo_handle_move_mem()   */  static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,  			  struct ttm_operation_ctx *ctx, -			  struct ttm_resource *new_mem) +			  struct ttm_resource *new_mem, +			  struct ttm_place *hop)  {  	struct amdgpu_device *adev;  	struct amdgpu_bo *abo;  	struct ttm_resource *old_mem = &bo->mem;  	int r; +	if (new_mem->mem_type == TTM_PL_TT) { +		r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem); +		if (r) +			return r; +	} +  	/* Can't move a pinned BO */  	abo = ttm_to_amdgpu_bo(bo); -	if (WARN_ON_ONCE(abo->pin_count > 0)) +	if (WARN_ON_ONCE(abo->tbo.pin_count > 0))  		return -EINVAL;  	adev = amdgpu_ttm_adev(bo->bdev);  	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {  		ttm_bo_move_null(bo, new_mem); -		return 0; +		goto out;  	} -	if ((old_mem->mem_type == TTM_PL_TT && -	     new_mem->mem_type == TTM_PL_SYSTEM) || -	    (old_mem->mem_type == TTM_PL_SYSTEM && -	     new_mem->mem_type == TTM_PL_TT)) { -		/* bind is enough */ +	if (old_mem->mem_type == TTM_PL_SYSTEM && +	    new_mem->mem_type == TTM_PL_TT) {  		ttm_bo_move_null(bo, new_mem); -		return 0; +		goto out; +	} +	if (old_mem->mem_type == TTM_PL_TT && +	    new_mem->mem_type == TTM_PL_SYSTEM) { +		r = ttm_bo_wait_ctx(bo, ctx); +		if (r) +			return r; + +		amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm); +		ttm_resource_free(bo, &bo->mem); +		ttm_bo_assign_mem(bo, new_mem); +		goto out;  	} +  	if (old_mem->mem_type == AMDGPU_PL_GDS ||  	    old_mem->mem_type == AMDGPU_PL_GWS ||  	    old_mem->mem_type == AMDGPU_PL_OA || @@ -687,27 +593,27 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,  	    new_mem->mem_type == AMDGPU_PL_OA) {  		/* Nothing to save here */  		ttm_bo_move_null(bo, new_mem); -		return 0; +		goto out;  	} -	if (!adev->mman.buffer_funcs_enabled) { -		r = -ENODEV; -		goto memcpy; -	} +	if (adev->mman.buffer_funcs_enabled) { +		if (((old_mem->mem_type == TTM_PL_SYSTEM && +		      new_mem->mem_type == TTM_PL_VRAM) || +		     (old_mem->mem_type == TTM_PL_VRAM && +		      new_mem->mem_type == TTM_PL_SYSTEM))) { +			hop->fpfn = 0; +			hop->lpfn = 0; +			hop->mem_type = TTM_PL_TT; +			hop->flags = 0; +			return -EMULTIHOP; +		} -	if (old_mem->mem_type == TTM_PL_VRAM && -	    new_mem->mem_type == TTM_PL_SYSTEM) { -		r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem); -	} else if (old_mem->mem_type == TTM_PL_SYSTEM && -		   new_mem->mem_type == TTM_PL_VRAM) { -		r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem); +		r = amdgpu_move_blit(bo, evict, new_mem, old_mem);  	} else { -		r = amdgpu_move_blit(bo, evict, -				     new_mem, old_mem); +		r = -ENODEV;  	}  	if (r) { -memcpy:  		/* Check that all memory is CPU accessible */  		if (!amdgpu_mem_visible(adev, old_mem) ||  		    !amdgpu_mem_visible(adev, new_mem)) { @@ -729,12 +635,14 @@ memcpy:  		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;  	} +out:  	/* update statistics */  	atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved); +	amdgpu_bo_move_notify(bo, evict, new_mem);  	return 0;  } -/** +/*   * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault   *   * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault() @@ -767,6 +675,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_reso  		mem->bus.offset += adev->gmc.aper_base;  		mem->bus.is_iomem = true; +		mem->bus.caching = ttm_write_combined;  		break;  	default:  		return -EINVAL; @@ -811,7 +720,7 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)   * TTM backend functions.   */  struct amdgpu_ttm_tt { -	struct ttm_dma_tt	ttm; +	struct ttm_tt	ttm;  	struct drm_gem_object	*gobj;  	u64			offset;  	uint64_t		userptr; @@ -824,7 +733,7 @@ struct amdgpu_ttm_tt {  };  #ifdef CONFIG_DRM_AMDGPU_USERPTR -/** +/*   * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user   * memory and start HMM tracking CPU page table update   * @@ -929,7 +838,7 @@ out:  	return r;  } -/** +/*   * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change   * Check if the pages backing this ttm range have been invalidated   * @@ -943,7 +852,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)  	if (!gtt || !gtt->userptr)  		return false; -	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n", +	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",  		gtt->userptr, ttm->num_pages);  	WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns, @@ -965,7 +874,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)  }  #endif -/** +/*   * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.   *   * Called by amdgpu_cs_list_validate(). This creates the page list @@ -980,8 +889,8 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)  		ttm->pages[i] = pages ? pages[i] : NULL;  } -/** - * amdgpu_ttm_tt_pin_userptr - 	prepare the sg table with the user pages +/* + * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages   *   * Called by amdgpu_ttm_backend_bind()   **/ @@ -1020,7 +929,7 @@ release_sg:  	return r;  } -/** +/*   * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages   */  static void amdgpu_ttm_tt_unpin_userptr(struct ttm_bo_device *bdev, @@ -1095,13 +1004,13 @@ static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,  gart_bind_fail:  	if (r) -		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", +		DRM_ERROR("failed to bind %u pages at 0x%08llX\n",  			  ttm->num_pages, gtt->offset);  	return r;  } -/** +/*   * amdgpu_ttm_backend_bind - Bind GTT memory   *   * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem(). @@ -1130,7 +1039,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev,  		}  	}  	if (!ttm->num_pages) { -		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", +		WARN(1, "nothing to bind %u pages for mreg %p back %p!\n",  		     ttm->num_pages, bo_mem, ttm);  	} @@ -1153,13 +1062,13 @@ static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev,  		ttm->pages, gtt->ttm.dma_address, flags);  	if (r) -		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", +		DRM_ERROR("failed to bind %u pages at 0x%08llX\n",  			  ttm->num_pages, gtt->offset);  	gtt->bound = true;  	return r;  } -/** +/*   * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either   * through AGP or GART aperture.   * @@ -1171,7 +1080,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);  	struct ttm_operation_ctx ctx = { false, false }; -	struct amdgpu_ttm_tt *gtt = (void*)bo->ttm; +	struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;  	struct ttm_resource tmp;  	struct ttm_placement placement;  	struct ttm_place placements; @@ -1220,7 +1129,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)  	return 0;  } -/** +/*   * amdgpu_ttm_recover_gart - Rebind GTT pages   *   * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to @@ -1241,7 +1150,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)  	return r;  } -/** +/*   * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages   *   * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and @@ -1267,8 +1176,8 @@ static void amdgpu_ttm_backend_unbind(struct ttm_bo_device *bdev,  	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */  	r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);  	if (r) -		DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", -			  gtt->ttm.ttm.num_pages, gtt->offset); +		DRM_ERROR("failed to unbind %u pages at 0x%08llX\n", +			  gtt->ttm.num_pages, gtt->offset);  	gtt->bound = false;  } @@ -1282,7 +1191,7 @@ static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev,  	if (gtt->usertask)  		put_task_struct(gtt->usertask); -	ttm_dma_tt_fini(>t->ttm); +	ttm_tt_fini(>t->ttm);  	kfree(gtt);  } @@ -1290,13 +1199,16 @@ static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev,   * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO   *   * @bo: The buffer object to create a GTT ttm_tt object around + * @page_flags: Page flags to be added to the ttm_tt object   *   * Called by ttm_tt_create().   */  static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,  					   uint32_t page_flags)  { +	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);  	struct amdgpu_ttm_tt *gtt; +	enum ttm_caching caching;  	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);  	if (gtt == NULL) { @@ -1304,15 +1216,20 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,  	}  	gtt->gobj = &bo->base; +	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) +		caching = ttm_write_combined; +	else +		caching = ttm_cached; +  	/* allocate space for the uninitialized page entries */ -	if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { +	if (ttm_sg_tt_init(>t->ttm, bo, page_flags, caching)) {  		kfree(gtt);  		return NULL;  	} -	return >t->ttm.ttm; +	return >t->ttm;  } -/** +/*   * amdgpu_ttm_tt_populate - Map GTT pages visible to the device   *   * Map the pages of a ttm_tt object to an address space visible @@ -1332,7 +1249,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_bo_device *bdev,  			return -ENOMEM;  		ttm->page_flags |= TTM_PAGE_FLAG_SG; -		ttm_tt_set_populated(ttm);  		return 0;  	} @@ -1352,28 +1268,20 @@ static int amdgpu_ttm_tt_populate(struct ttm_bo_device *bdev,  		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,  						 gtt->ttm.dma_address,  						 ttm->num_pages); -		ttm_tt_set_populated(ttm);  		return 0;  	} -#ifdef CONFIG_SWIOTLB -	if (adev->need_swiotlb && swiotlb_nr_tbl()) { -		return ttm_dma_populate(>t->ttm, adev->dev, ctx); -	} -#endif - -	/* fall back to generic helper to populate the page array -	 * and map them to the device */ -	return ttm_populate_and_map_pages(adev->dev, >t->ttm, ctx); +	return ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);  } -/** +/*   * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays   *   * Unmaps pages of a ttm_tt object from the device address space and   * unpopulates the page array backing it.   */ -static void amdgpu_ttm_tt_unpopulate(struct ttm_bo_device *bdev, struct ttm_tt *ttm) +static void amdgpu_ttm_tt_unpopulate(struct ttm_bo_device *bdev, +				     struct ttm_tt *ttm)  {  	struct amdgpu_ttm_tt *gtt = (void *)ttm;  	struct amdgpu_device *adev; @@ -1398,16 +1306,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_bo_device *bdev, struct ttm_tt *  		return;  	adev = amdgpu_ttm_adev(bdev); - -#ifdef CONFIG_SWIOTLB -	if (adev->need_swiotlb && swiotlb_nr_tbl()) { -		ttm_dma_unpopulate(>t->ttm, adev->dev); -		return; -	} -#endif - -	/* fall back to generic helper to unmap and unpopulate array */ -	ttm_unmap_and_unpopulate_pages(adev->dev, >t->ttm); +	return ttm_pool_free(&adev->mman.bdev.pool, ttm);  }  /** @@ -1433,7 +1332,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,  			return -ENOMEM;  	} -	gtt = (void*)bo->ttm; +	gtt = (void *)bo->ttm;  	gtt->userptr = addr;  	gtt->userflags = flags; @@ -1445,7 +1344,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,  	return 0;  } -/** +/*   * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object   */  struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) @@ -1461,7 +1360,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)  	return gtt->usertask->mm;  } -/** +/*   * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an   * address range for the current task.   * @@ -1478,14 +1377,14 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,  	/* Return false if no part of the ttm_tt object lies within  	 * the range  	 */ -	size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; +	size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE;  	if (gtt->userptr > end || gtt->userptr + size <= start)  		return false;  	return true;  } -/** +/*   * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?   */  bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) @@ -1498,7 +1397,7 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)  	return true;  } -/** +/*   * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?   */  bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) @@ -1529,7 +1428,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)  	if (mem && mem->mem_type == TTM_PL_TT) {  		flags |= AMDGPU_PTE_SYSTEM; -		if (ttm->caching_state == tt_cached) +		if (ttm->caching == ttm_cached)  			flags |= AMDGPU_PTE_SNOOPED;  	} @@ -1539,9 +1438,10 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)  /**   * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object   * + * @adev: amdgpu_device pointer   * @ttm: The ttm_tt object to compute the flags for   * @mem: The memory registry backing this ttm_tt object - + *   * Figure out the flags to use for a VM PTE (Page Table Entry).   */  uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, @@ -1558,7 +1458,7 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,  	return flags;  } -/** +/*   * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer   * object.   * @@ -1699,20 +1599,23 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,  	return ret;  } +static void +amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) +{ +	amdgpu_bo_move_notify(bo, false, NULL); +} +  static struct ttm_bo_driver amdgpu_bo_driver = {  	.ttm_tt_create = &amdgpu_ttm_tt_create,  	.ttm_tt_populate = &amdgpu_ttm_tt_populate,  	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate, -	.ttm_tt_bind = &amdgpu_ttm_backend_bind, -	.ttm_tt_unbind = &amdgpu_ttm_backend_unbind,  	.ttm_tt_destroy = &amdgpu_ttm_backend_destroy,  	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,  	.evict_flags = &amdgpu_evict_flags,  	.move = &amdgpu_bo_move,  	.verify_access = &amdgpu_verify_access, -	.move_notify = &amdgpu_bo_move_notify, +	.delete_mem_notify = &amdgpu_bo_delete_mem_notify,  	.release_notify = &amdgpu_bo_release_notify, -	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,  	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,  	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,  	.access_memory = &amdgpu_ttm_access_memory, @@ -1866,7 +1769,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)  	return 0;  } -/** +/*   * amdgpu_ttm_init - Init the memory management (ttm) as well as various   * gtt/vram related fields.   * @@ -1884,10 +1787,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	mutex_init(&adev->mman.gtt_window_lock);  	/* No others user of address space so set it to 0 */ -	r = ttm_bo_device_init(&adev->mman.bdev, -			       &amdgpu_bo_driver, +	r = ttm_bo_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,  			       adev_to_drm(adev)->anon_inode->i_mapping,  			       adev_to_drm(adev)->vma_offset_manager, +			       adev->need_swiotlb,  			       dma_addressing_limited(adev->dev));  	if (r) {  		DRM_ERROR("failed initializing buffer object driver(%d).\n", r); @@ -1895,9 +1798,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	}  	adev->mman.initialized = true; -	/* We opt to avoid OOM on system pages allocations */ -	adev->mman.bdev.no_retry = true; -  	/* Initialize VRAM pool with all of VRAM divided into pages */  	r = amdgpu_vram_mgr_init(adev);  	if (r) { @@ -2003,18 +1903,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	return 0;  } -/** - * amdgpu_ttm_late_init - Handle any late initialization for amdgpu_ttm - */ -void amdgpu_ttm_late_init(struct amdgpu_device *adev) -{ -	/* return the VGA stolen memory (if any) back to VRAM */ -	if (!adev->mman.keep_stolen_vga_memory) -		amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); -	amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); -} - -/** +/*   * amdgpu_ttm_fini - De-initialize the TTM memory pools   */  void amdgpu_ttm_fini(struct amdgpu_device *adev) @@ -2024,8 +1913,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)  	amdgpu_ttm_training_reserve_vram_fini(adev);  	/* return the stolen vga memory back to VRAM */ -	if (adev->mman.keep_stolen_vga_memory) -		amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); +	amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); +	amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);  	/* return the IP Discovery TMR memory back to VRAM */  	amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);  	amdgpu_ttm_fw_reserve_vram_fini(adev); @@ -2092,15 +1981,48 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)  	adev->mman.buffer_funcs_enabled = enable;  } +static vm_fault_t amdgpu_ttm_fault(struct vm_fault *vmf) +{ +	struct ttm_buffer_object *bo = vmf->vma->vm_private_data; +	vm_fault_t ret; + +	ret = ttm_bo_vm_reserve(bo, vmf); +	if (ret) +		return ret; + +	ret = amdgpu_bo_fault_reserve_notify(bo); +	if (ret) +		goto unlock; + +	ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, +				       TTM_BO_VM_NUM_PREFAULT, 1); +	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) +		return ret; + +unlock: +	dma_resv_unlock(bo->base.resv); +	return ret; +} + +static struct vm_operations_struct amdgpu_ttm_vm_ops = { +	.fault = amdgpu_ttm_fault, +	.open = ttm_bo_vm_open, +	.close = ttm_bo_vm_close, +	.access = ttm_bo_vm_access +}; +  int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)  {  	struct drm_file *file_priv = filp->private_data;  	struct amdgpu_device *adev = drm_to_adev(file_priv->minor->dev); +	int r; -	if (adev == NULL) -		return -EINVAL; +	r = ttm_bo_mmap(filp, vma, &adev->mman.bdev); +	if (unlikely(r != 0)) +		return r; -	return ttm_bo_mmap(filp, vma, &adev->mman.bdev); +	vma->vm_ops = &amdgpu_ttm_vm_ops; +	return 0;  }  int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, @@ -2284,19 +2206,25 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)  	return 0;  } +static int amdgpu_ttm_pool_debugfs(struct seq_file *m, void *data) +{ +	struct drm_info_node *node = (struct drm_info_node *)m->private; +	struct drm_device *dev = node->minor->dev; +	struct amdgpu_device *adev = drm_to_adev(dev); + +	return ttm_pool_debugfs(&adev->mman.bdev.pool, m); +} +  static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {  	{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM},  	{"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT},  	{"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS},  	{"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS},  	{"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA}, -	{"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL}, -#ifdef CONFIG_SWIOTLB -	{"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL} -#endif +	{"ttm_page_pool", amdgpu_ttm_pool_debugfs, 0, NULL},  }; -/** +/*   * amdgpu_ttm_vram_read - Linear read access to VRAM   *   * Accesses VRAM via MMIO for debugging purposes. @@ -2331,7 +2259,7 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,  	return result;  } -/** +/*   * amdgpu_ttm_vram_write - Linear write access to VRAM   *   * Accesses VRAM via MMIO for debugging purposes. @@ -2384,7 +2312,7 @@ static const struct file_operations amdgpu_ttm_vram_fops = {  #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS -/** +/*   * amdgpu_ttm_gtt_read - Linear read access to GTT memory   */  static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, @@ -2434,7 +2362,7 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {  #endif -/** +/*   * amdgpu_iomem_read - Virtual read access to GPU mapped memory   *   * This function is used to read memory that has been mapped to the @@ -2490,7 +2418,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,  	return result;  } -/** +/*   * amdgpu_iomem_write - Virtual write access to GPU mapped memory   *   * This function is used to write memory that has been mapped to the @@ -2586,12 +2514,6 @@ int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)  	}  	count = ARRAY_SIZE(amdgpu_ttm_debugfs_list); - -#ifdef CONFIG_SWIOTLB -	if (!(adev->need_swiotlb && swiotlb_nr_tbl())) -		--count; -#endif -  	return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);  #else  	return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index a87951b2f06d..d2987536d7cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -37,10 +37,17 @@  #define AMDGPU_POISON	0xd0bed0be +struct amdgpu_vram_reservation { +	struct list_head node; +	struct drm_mm_node mm_node; +}; +  struct amdgpu_vram_mgr {  	struct ttm_resource_manager manager;  	struct drm_mm mm;  	spinlock_t lock; +	struct list_head reservations_pending; +	struct list_head reserved_pages;  	atomic64_t usage;  	atomic64_t vis_usage;  }; @@ -54,7 +61,6 @@ struct amdgpu_gtt_mgr {  struct amdgpu_mman {  	struct ttm_bo_device		bdev; -	bool				mem_global_referenced;  	bool				initialized;  	void __iomem			*aper_base_kaddr; @@ -119,9 +125,12 @@ void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev,  			      struct sg_table *sgt);  uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man);  uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man); +int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, +				  uint64_t start, uint64_t size); +int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man, +				      uint64_t start);  int amdgpu_ttm_init(struct amdgpu_device *adev); -void amdgpu_ttm_late_init(struct amdgpu_device *adev);  void amdgpu_ttm_fini(struct amdgpu_device *adev);  void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,  					bool enable); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index b313ce4c3e97..1beb08af347f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -68,23 +68,32 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr)  {  	uint16_t version_major = le16_to_cpu(hdr->header_version_major);  	uint16_t version_minor = le16_to_cpu(hdr->header_version_minor); +	const struct smc_firmware_header_v1_0 *v1_0_hdr; +	const struct smc_firmware_header_v2_0 *v2_0_hdr; +	const struct smc_firmware_header_v2_1 *v2_1_hdr;  	DRM_DEBUG("SMC\n");  	amdgpu_ucode_print_common_hdr(hdr);  	if (version_major == 1) { -		const struct smc_firmware_header_v1_0 *smc_hdr = -			container_of(hdr, struct smc_firmware_header_v1_0, header); - -		DRM_DEBUG("ucode_start_addr: %u\n", le32_to_cpu(smc_hdr->ucode_start_addr)); +		v1_0_hdr = container_of(hdr, struct smc_firmware_header_v1_0, header); +		DRM_DEBUG("ucode_start_addr: %u\n", le32_to_cpu(v1_0_hdr->ucode_start_addr));  	} else if (version_major == 2) { -		const struct smc_firmware_header_v1_0 *v1_hdr = -			container_of(hdr, struct smc_firmware_header_v1_0, header); -		const struct smc_firmware_header_v2_0 *v2_hdr = -			container_of(v1_hdr, struct smc_firmware_header_v2_0, v1_0); +		switch (version_minor) { +		case 0: +			v2_0_hdr = container_of(hdr, struct smc_firmware_header_v2_0, v1_0.header); +			DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_offset_bytes)); +			DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_size_bytes)); +			break; +		case 1: +			v2_1_hdr = container_of(hdr, struct smc_firmware_header_v2_1, v1_0.header); +			DRM_DEBUG("pptable_count: %u\n", le32_to_cpu(v2_1_hdr->pptable_count)); +			DRM_DEBUG("pptable_entry_offset: %u\n", le32_to_cpu(v2_1_hdr->pptable_entry_offset)); +			break; +		default: +			break; +		} -		DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); -		DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes));  	} else {  		DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor);  	} @@ -391,6 +400,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)  	case CHIP_NAVI12:  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		if (!load_type)  			return AMDGPU_FW_LOAD_DIRECT;  		else @@ -586,8 +597,8 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,  {  	const struct gfx_firmware_header_v1_0 *header = NULL;  	const struct common_firmware_header *comm_hdr = NULL; -	uint8_t* src_addr = NULL; -	uint8_t* dst_addr = NULL; +	uint8_t *src_addr = NULL; +	uint8_t *dst_addr = NULL;  	if (NULL == ucode->fw)  		return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 262baf0f61ea..a2975c8092a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -126,10 +126,11 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,  				err_data->ue_count);  		if ((amdgpu_bad_page_threshold != 0) && -			err_data->err_addr_cnt && +			err_data->err_addr_cnt) {  			amdgpu_ras_add_bad_pages(adev, err_data->err_addr, -						err_data->err_addr_cnt)) -			dev_warn(adev->dev, "Failed to add ras bad page!\n"); +						err_data->err_addr_cnt); +			amdgpu_ras_save_bad_pages(adev); +		}  		amdgpu_ras_reset_gpu(adev);  	} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index f8bebf18ee36..8b989670ed66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -87,7 +87,7 @@  #define UVD_NO_OP				0x03ff  #define UVD_BASE_SI				0x3800 -/** +/*   * amdgpu_uvd_cs_ctx - Command submission parser context   *   * Used for emulating virtual memory support on UVD 4.2. @@ -240,7 +240,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)  		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;  		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; -		DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", +		DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",  			version_major, version_minor, family_id);  		/* @@ -267,7 +267,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)  		dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;  		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;  		enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3; -		DRM_INFO("Found UVD firmware ENC: %hu.%hu DEC: .%hu Family ID: %hu\n", +		DRM_INFO("Found UVD firmware ENC: %u.%u DEC: .%u Family ID: %u\n",  			enc_major, enc_minor, dec_minor, family_id);  		adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; @@ -545,8 +545,9 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)  /**   * amdgpu_uvd_cs_msg_decode - handle UVD decode message   * + * @adev: amdgpu_device pointer   * @msg: pointer to message structure - * @buf_sizes: returned buffer sizes + * @buf_sizes: placeholder to put the different buffer lengths   *   * Peek into the decode message and calculate the necessary buffer sizes.   */ @@ -1005,6 +1006,7 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,   * amdgpu_uvd_ring_parse_cs - UVD command submission parser   *   * @parser: Command submission parser context + * @ib_idx: Which indirect buffer to use   *   * Parse the command stream, patch in addresses as necessary.   */ @@ -1279,6 +1281,7 @@ void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)   * amdgpu_uvd_ring_test_ib - test ib execution   *   * @ring: amdgpu_ring pointer + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   * Test if we can successfully execute an IB   */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 5eb63288d157..edbb8194ee81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -67,6 +67,7 @@ struct amdgpu_uvd {  	unsigned		harvest_config;  	/* store image width to adjust nb memory state */  	unsigned		decode_image_width; +	uint32_t                keyselect;  };  int amdgpu_uvd_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index ecaa2d7483b2..0d5284b936e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -90,6 +90,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,   * amdgpu_vce_init - allocate memory, load vce firmware   *   * @adev: amdgpu_device pointer + * @size: size for the new BO   *   * First step to get VCE online, allocate memory and load the firmware   */ @@ -178,7 +179,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)  	version_major = (ucode_version >> 20) & 0xfff;  	version_minor = (ucode_version >> 8) & 0xfff;  	binary_id = ucode_version & 0xff; -	DRM_INFO("Found VCE firmware Version: %hhd.%hhd Binary ID: %hhd\n", +	DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",  		version_major, version_minor, binary_id);  	adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |  				(binary_id << 8)); @@ -428,9 +429,9 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)  /**   * amdgpu_vce_get_create_msg - generate a VCE create msg   * - * @adev: amdgpu_device pointer   * @ring: ring we should submit the msg to   * @handle: VCE session handle to use + * @bo: amdgpu object for which we query the offset   * @fence: optional fence to return   *   * Open up a stream for HW test @@ -509,9 +510,9 @@ err:  /**   * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg   * - * @adev: amdgpu_device pointer   * @ring: ring we should submit the msg to   * @handle: VCE session handle to use + * @direct: direct or delayed pool   * @fence: optional fence to return   *   * Close up a stream for HW test or if userspace failed to do so @@ -576,6 +577,7 @@ err:   * amdgpu_vce_cs_validate_bo - make sure not to cross 4GB boundary   *   * @p: parser context + * @ib_idx: indirect buffer to use   * @lo: address of lower dword   * @hi: address of higher dword   * @size: minimum size @@ -625,9 +627,11 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,   * amdgpu_vce_cs_reloc - command submission relocation   *   * @p: parser context + * @ib_idx: indirect buffer to use   * @lo: address of lower dword   * @hi: address of higher dword   * @size: minimum size + * @index: bs/fb index   *   * Patch relocation inside command stream with real buffer address   */ @@ -714,7 +718,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,   * amdgpu_vce_cs_parse - parse and validate the command stream   *   * @p: parser context - * + * @ib_idx: indirect buffer to use   */  int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)  { @@ -950,7 +954,7 @@ out:   * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode   *   * @p: parser context - * + * @ib_idx: indirect buffer to use   */  int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)  { @@ -1040,7 +1044,9 @@ out:   * amdgpu_vce_ring_emit_ib - execute indirect buffer   *   * @ring: engine to use + * @job: job to retrieve vmid from   * @ib: the IB to execute + * @flags: unused   *   */  void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, @@ -1058,7 +1064,9 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,   * amdgpu_vce_ring_emit_fence - add a fence command to the ring   *   * @ring: engine to use - * @fence: the fence + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   */  void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, @@ -1116,6 +1124,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)   * amdgpu_vce_ring_test_ib - test if VCE IBs are working   *   * @ring: the engine to test on + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   */  int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f3b7287e84c4..4a77c7424dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -37,24 +37,30 @@  #define FIRMWARE_RAVEN		"amdgpu/raven_vcn.bin"  #define FIRMWARE_PICASSO	"amdgpu/picasso_vcn.bin"  #define FIRMWARE_RAVEN2		"amdgpu/raven2_vcn.bin" -#define FIRMWARE_ARCTURUS 	"amdgpu/arcturus_vcn.bin" -#define FIRMWARE_RENOIR 	"amdgpu/renoir_vcn.bin" -#define FIRMWARE_NAVI10 	"amdgpu/navi10_vcn.bin" -#define FIRMWARE_NAVI14 	"amdgpu/navi14_vcn.bin" -#define FIRMWARE_NAVI12 	"amdgpu/navi12_vcn.bin" -#define FIRMWARE_SIENNA_CICHLID 	"amdgpu/sienna_cichlid_vcn.bin" -#define FIRMWARE_NAVY_FLOUNDER 	"amdgpu/navy_flounder_vcn.bin" +#define FIRMWARE_ARCTURUS	"amdgpu/arcturus_vcn.bin" +#define FIRMWARE_RENOIR		"amdgpu/renoir_vcn.bin" +#define FIRMWARE_GREEN_SARDINE	"amdgpu/green_sardine_vcn.bin" +#define FIRMWARE_NAVI10		"amdgpu/navi10_vcn.bin" +#define FIRMWARE_NAVI14		"amdgpu/navi14_vcn.bin" +#define FIRMWARE_NAVI12		"amdgpu/navi12_vcn.bin" +#define FIRMWARE_SIENNA_CICHLID	"amdgpu/sienna_cichlid_vcn.bin" +#define FIRMWARE_NAVY_FLOUNDER	"amdgpu/navy_flounder_vcn.bin" +#define FIRMWARE_VANGOGH	"amdgpu/vangogh_vcn.bin" +#define FIRMWARE_DIMGREY_CAVEFISH	"amdgpu/dimgrey_cavefish_vcn.bin"  MODULE_FIRMWARE(FIRMWARE_RAVEN);  MODULE_FIRMWARE(FIRMWARE_PICASSO);  MODULE_FIRMWARE(FIRMWARE_RAVEN2);  MODULE_FIRMWARE(FIRMWARE_ARCTURUS);  MODULE_FIRMWARE(FIRMWARE_RENOIR); +MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE);  MODULE_FIRMWARE(FIRMWARE_NAVI10);  MODULE_FIRMWARE(FIRMWARE_NAVI14);  MODULE_FIRMWARE(FIRMWARE_NAVI12);  MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID);  MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); +MODULE_FIRMWARE(FIRMWARE_VANGOGH); +MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH);  static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -89,7 +95,11 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)  			adev->vcn.indirect_sram = true;  		break;  	case CHIP_RENOIR: -		fw_name = FIRMWARE_RENOIR; +		if (adev->apu_flags & AMD_APU_IS_RENOIR) +			fw_name = FIRMWARE_RENOIR; +		else +			fw_name = FIRMWARE_GREEN_SARDINE; +  		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&  		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))  			adev->vcn.indirect_sram = true; @@ -124,6 +134,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)  		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))  			adev->vcn.indirect_sram = true;  		break; +	case CHIP_VANGOGH: +		fw_name = FIRMWARE_VANGOGH; +		break; +	case CHIP_DIMGREY_CAVEFISH: +		fw_name = FIRMWARE_DIMGREY_CAVEFISH; +		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && +		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) +			adev->vcn.indirect_sram = true; +		break;  	default:  		return -EINVAL;  	} @@ -162,7 +181,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)  		enc_major = fw_check;  		dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;  		vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; -		DRM_INFO("Found VCN firmware Version ENC: %hu.%hu DEC: %hu VEP: %hu Revision: %hu\n", +		DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",  			enc_major, enc_minor, dec_ver, vep, fw_rev);  	} else {  		unsigned int version_major, version_minor, family_id; @@ -170,7 +189,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)  		family_id = le32_to_cpu(hdr->ucode_version) & 0xff;  		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;  		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; -		DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", +		DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",  			version_major, version_minor, family_id);  	} @@ -313,6 +332,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)  		container_of(work, struct amdgpu_device, vcn.idle_work.work);  	unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};  	unsigned int i, j; +	int r = 0;  	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {  		if (adev->vcn.harvest_config & (1 << j)) @@ -339,8 +359,13 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)  	}  	if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) { +		amdgpu_gfx_off_ctrl(adev, true);  		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,  		       AMD_PG_STATE_GATE); +		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, +				false); +		if (r) +			dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);  	} else {  		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);  	} @@ -349,9 +374,17 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)  void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; +	int r = 0;  	atomic_inc(&adev->vcn.total_submission_cnt); -	cancel_delayed_work_sync(&adev->vcn.idle_work); + +	if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) { +		amdgpu_gfx_off_ctrl(adev, false); +		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, +				true); +		if (r) +			dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); +	}  	mutex_lock(&adev->vcn.vcn_pg_lock);  	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -423,6 +456,37 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)  	return r;  } +int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; +	uint32_t rptr; +	unsigned int i; +	int r; + +	if (amdgpu_sriov_vf(adev)) +		return 0; + +	r = amdgpu_ring_alloc(ring, 16); +	if (r) +		return r; + +	rptr = amdgpu_ring_get_rptr(ring); + +	amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); +	amdgpu_ring_commit(ring); + +	for (i = 0; i < adev->usec_timeout; i++) { +		if (amdgpu_ring_get_rptr(ring) != rptr) +			break; +		udelay(1); +	} + +	if (i >= adev->usec_timeout) +		r = -ETIMEDOUT; + +	return r; +} +  static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,  				   struct amdgpu_bo *bo,  				   struct dma_fence **fence) @@ -477,16 +541,16 @@ err:  }  static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, -			      struct dma_fence **fence) +					 struct amdgpu_bo **bo)  {  	struct amdgpu_device *adev = ring->adev; -	struct amdgpu_bo *bo = NULL;  	uint32_t *msg;  	int r, i; +	*bo = NULL;  	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,  				      AMDGPU_GEM_DOMAIN_VRAM, -				      &bo, NULL, (void **)&msg); +				      bo, NULL, (void **)&msg);  	if (r)  		return r; @@ -507,20 +571,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand  	for (i = 14; i < 1024; ++i)  		msg[i] = cpu_to_le32(0x0); -	return amdgpu_vcn_dec_send_msg(ring, bo, fence); +	return 0;  }  static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, -			       struct dma_fence **fence) +					  struct amdgpu_bo **bo)  {  	struct amdgpu_device *adev = ring->adev; -	struct amdgpu_bo *bo = NULL;  	uint32_t *msg;  	int r, i; +	*bo = NULL;  	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,  				      AMDGPU_GEM_DOMAIN_VRAM, -				      &bo, NULL, (void **)&msg); +				      bo, NULL, (void **)&msg);  	if (r)  		return r; @@ -533,19 +597,117 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han  	for (i = 6; i < 1024; ++i)  		msg[i] = cpu_to_le32(0x0); -	return amdgpu_vcn_dec_send_msg(ring, bo, fence); +	return 0;  }  int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)  { -	struct dma_fence *fence; +	struct dma_fence *fence = NULL; +	struct amdgpu_bo *bo;  	long r; -	r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); +	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo); +	if (r) +		goto error; + +	r = amdgpu_vcn_dec_send_msg(ring, bo, NULL); +	if (r) +		goto error; +	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo); +	if (r) +		goto error; + +	r = amdgpu_vcn_dec_send_msg(ring, bo, &fence); +	if (r) +		goto error; + +	r = dma_fence_wait_timeout(fence, false, timeout); +	if (r == 0) +		r = -ETIMEDOUT; +	else if (r > 0) +		r = 0; + +	dma_fence_put(fence); +error: +	return r; +} + +static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, +				   struct amdgpu_bo *bo, +				   struct dma_fence **fence) +{ +	struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; +	const unsigned int ib_size_dw = 64; +	struct amdgpu_device *adev = ring->adev; +	struct dma_fence *f = NULL; +	struct amdgpu_job *job; +	struct amdgpu_ib *ib; +	uint64_t addr; +	int i, r; + +	r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, +				AMDGPU_IB_POOL_DIRECT, &job); +	if (r) +		goto err; + +	ib = &job->ibs[0]; +	addr = amdgpu_bo_gpu_offset(bo); +	ib->length_dw = 0; + +	ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; +	ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); +	decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); +	ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; +	memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); + +	decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER); +	decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32); +	decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr); + +	for (i = ib->length_dw; i < ib_size_dw; ++i) +		ib->ptr[i] = 0x0; + +	r = amdgpu_job_submit_direct(job, ring, &f); +	if (r) +		goto err_free; + +	amdgpu_bo_fence(bo, f, false); +	amdgpu_bo_unreserve(bo); +	amdgpu_bo_unref(&bo); + +	if (fence) +		*fence = dma_fence_get(f); +	dma_fence_put(f); + +	return 0; + +err_free: +	amdgpu_job_free(job); + +err: +	amdgpu_bo_unreserve(bo); +	amdgpu_bo_unref(&bo); +	return r; +} + +int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ +	struct dma_fence *fence = NULL; +	struct amdgpu_bo *bo; +	long r; + +	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo); +	if (r) +		goto error; + +	r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL); +	if (r) +		goto error; +	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);  	if (r)  		goto error; -	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); +	r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence);  	if (r)  		goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 17691158f783..13aa417f6be7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -44,6 +44,17 @@  #define VCN_DEC_CMD_PACKET_START	0x0000000a  #define VCN_DEC_CMD_PACKET_END		0x0000000b +#define VCN_DEC_SW_CMD_NO_OP		0x00000000 +#define VCN_DEC_SW_CMD_END		0x00000001 +#define VCN_DEC_SW_CMD_IB		0x00000002 +#define VCN_DEC_SW_CMD_FENCE		0x00000003 +#define VCN_DEC_SW_CMD_TRAP		0x00000004 +#define VCN_DEC_SW_CMD_IB_AUTO		0x00000005 +#define VCN_DEC_SW_CMD_SEMAPHORE	0x00000006 +#define VCN_DEC_SW_CMD_PREEMPT_FENCE	0x00000009 +#define VCN_DEC_SW_CMD_REG_WRITE	0x0000000b +#define VCN_DEC_SW_CMD_REG_WAIT		0x0000000c +  #define VCN_ENC_CMD_NO_OP		0x00000000  #define VCN_ENC_CMD_END 		0x00000001  #define VCN_ENC_CMD_IB			0x00000002 @@ -145,6 +156,10 @@  	} while (0)  #define AMDGPU_VCN_MULTI_QUEUE_FLAG	(1 << 8) +#define AMDGPU_VCN_SW_RING_FLAG		(1 << 9) + +#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER	0x00000001 +#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER		0x00000001  enum fw_queue_mode {  	FW_QUEUE_RING_RESET = 1, @@ -236,12 +251,25 @@ struct amdgpu_fw_shared_multi_queue {  	uint8_t padding[4];  }; +struct amdgpu_fw_shared_sw_ring { +	uint8_t is_enabled; +	uint8_t padding[3]; +}; +  struct amdgpu_fw_shared {  	uint32_t present_flag_0;  	uint8_t pad[53];  	struct amdgpu_fw_shared_multi_queue multi_queue; +	struct amdgpu_fw_shared_sw_ring sw_ring;  } __attribute__((__packed__)); +struct amdgpu_vcn_decode_buffer { +	uint32_t valid_buf_flag; +	uint32_t msg_buffer_address_hi; +	uint32_t msg_buffer_address_lo; +	uint32_t pad[30]; +}; +  int amdgpu_vcn_sw_init(struct amdgpu_device *adev);  int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);  int amdgpu_vcn_suspend(struct amdgpu_device *adev); @@ -251,6 +279,8 @@ void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);  int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);  int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); +int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout);  int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);  int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c index 7f7097931c6f..f9d3d79f68b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c @@ -59,7 +59,7 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)  		return;  	}  /* - 	TODO: Enable these code when pv2vf_info is merged +	TODO: Enable these code when pv2vf_info is merged  	AMDGPU_FW_VRAM_PF2VF_READ (adev, feature_flags, &pf2vf_flags);  	if (!(pf2vf_flags & AMDGIM_FEATURE_ERROR_LOG_COLLECT)) {  		return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index d0aea5e39531..2d51b7694d1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -47,11 +47,13 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)  void amdgpu_virt_init_setting(struct amdgpu_device *adev)  { +	struct drm_device *ddev = adev_to_drm(adev); +  	/* enable virtual display */  	if (adev->mode_info.num_crtc == 0)  		adev->mode_info.num_crtc = 1;  	adev->enable_virtual_display = true; -	adev_to_drm(adev)->driver->driver_features &= ~DRIVER_ATOMIC; +	ddev->driver_features &= ~DRIVER_ATOMIC;  	adev->cg_flags = 0;  	adev->pg_flags = 0;  } @@ -104,7 +106,7 @@ failed_kiq:  /**   * amdgpu_virt_request_full_gpu() - request full gpu access - * @amdgpu:	amdgpu device. + * @adev:	amdgpu device.   * @init:	is driver init time.   * When start to init/fini driver, first need to request full gpu access.   * Return: Zero if request success, otherwise will return error. @@ -127,7 +129,7 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init)  /**   * amdgpu_virt_release_full_gpu() - release full gpu access - * @amdgpu:	amdgpu device. + * @adev:	amdgpu device.   * @init:	is driver init time.   * When finishing driver init/fini, need to release full gpu access.   * Return: Zero if release success, otherwise will returen error. @@ -149,7 +151,7 @@ int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init)  /**   * amdgpu_virt_reset_gpu() - reset gpu - * @amdgpu:	amdgpu device. + * @adev:	amdgpu device.   * Send reset command to GPU hypervisor to reset GPU that VM is using   * Return: Zero if reset success, otherwise will return error.   */ @@ -184,7 +186,7 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev)  /**   * amdgpu_virt_wait_reset() - wait for reset gpu completed - * @amdgpu:	amdgpu device. + * @adev:	amdgpu device.   * Wait for GPU reset completed.   * Return: Zero if reset success, otherwise will return error.   */ @@ -200,7 +202,7 @@ int amdgpu_virt_wait_reset(struct amdgpu_device *adev)  /**   * amdgpu_virt_alloc_mm_table() - alloc memory for mm table - * @amdgpu:	amdgpu device. + * @adev:	amdgpu device.   * MM table is used by UVD and VCE for its initialization   * Return: Zero if allocate success.   */ @@ -230,7 +232,7 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)  /**   * amdgpu_virt_free_mm_table() - free mm table memory - * @amdgpu:	amdgpu device. + * @adev:	amdgpu device.   * Free MM table memory   */  void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) @@ -280,8 +282,8 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)  	if (!*data)  		return -ENOMEM; -	bps = kmalloc(align_space * sizeof((*data)->bps), GFP_KERNEL); -	bps_bo = kmalloc(align_space * sizeof((*data)->bps_bo), GFP_KERNEL); +	bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL); +	bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL);  	if (!bps || !bps_bo) {  		kfree(bps); @@ -555,7 +557,7 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)  	return 0;  } -void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) +static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)  {  	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, virt.vf2pf_work.work); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index df110afa97bf..0768c8686983 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -300,7 +300,7 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)  static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)  {  	spin_lock(&vm_bo->vm->invalidated_lock); -	list_del_init(&vm_bo->vm_status); +	list_move(&vm_bo->vm_status, &vm_bo->vm->done);  	spin_unlock(&vm_bo->vm->invalidated_lock);  } @@ -609,7 +609,7 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)  	if (!amdgpu_bo_is_amdgpu_bo(bo))  		return; -	if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) +	if (bo->pin_count)  		return;  	abo = ttm_to_amdgpu_bo(bo); @@ -1570,7 +1570,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,  /**   * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table   * - * @adev: amdgpu_device pointer + * @adev: amdgpu_device pointer of the VM + * @bo_adev: amdgpu_device pointer of the mapped BO   * @vm: requested vm   * @immediate: immediate submission in a page fault   * @unlocked: unlocked invalidation during MM callback @@ -1578,7 +1579,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,   * @start: start of mapped range   * @last: last mapped entry   * @flags: flags for the entries - * @addr: addr to set the area to + * @offset: offset into nodes and pages_addr + * @nodes: array of drm_mm_nodes with the MC addresses   * @pages_addr: DMA addresses to use for mapping   * @fence: optional resulting fence   * @@ -1588,15 +1590,18 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,   * 0 for success, -EINVAL for failure.   */  static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, +				       struct amdgpu_device *bo_adev,  				       struct amdgpu_vm *vm, bool immediate,  				       bool unlocked, struct dma_resv *resv,  				       uint64_t start, uint64_t last, -				       uint64_t flags, uint64_t addr, +				       uint64_t flags, uint64_t offset, +				       struct drm_mm_node *nodes,  				       dma_addr_t *pages_addr,  				       struct dma_fence **fence)  {  	struct amdgpu_vm_update_params params;  	enum amdgpu_sync_mode sync_mode; +	uint64_t pfn;  	int r;  	memset(¶ms, 0, sizeof(params)); @@ -1614,6 +1619,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  	else  		sync_mode = AMDGPU_SYNC_EXPLICIT; +	pfn = offset >> PAGE_SHIFT; +	if (nodes) { +		while (pfn >= nodes->size) { +			pfn -= nodes->size; +			++nodes; +		} +	} +  	amdgpu_vm_eviction_lock(vm);  	if (vm->evicting) {  		r = -EBUSY; @@ -1632,105 +1645,47 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  	if (r)  		goto error_unlock; -	r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); -	if (r) -		goto error_unlock; - -	r = vm->update_funcs->commit(¶ms, fence); - -error_unlock: -	amdgpu_vm_eviction_unlock(vm); -	return r; -} - -/** - * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks - * - * @adev: amdgpu_device pointer - * @resv: fences we need to sync to - * @pages_addr: DMA addresses to use for mapping - * @vm: requested vm - * @mapping: mapped range and flags to use for the update - * @flags: HW flags for the mapping - * @bo_adev: amdgpu_device pointer that bo actually been allocated - * @nodes: array of drm_mm_nodes with the MC addresses - * @fence: optional resulting fence - * - * Split the mapping into smaller chunks so that each update fits - * into a SDMA IB. - * - * Returns: - * 0 for success, -EINVAL for failure. - */ -static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, -				      struct dma_resv *resv, -				      dma_addr_t *pages_addr, -				      struct amdgpu_vm *vm, -				      struct amdgpu_bo_va_mapping *mapping, -				      uint64_t flags, -				      struct amdgpu_device *bo_adev, -				      struct drm_mm_node *nodes, -				      struct dma_fence **fence) -{ -	unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size; -	uint64_t pfn, start = mapping->start; -	int r; - -	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here -	 * but in case of something, we filter the flags in first place -	 */ -	if (!(mapping->flags & AMDGPU_PTE_READABLE)) -		flags &= ~AMDGPU_PTE_READABLE; -	if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) -		flags &= ~AMDGPU_PTE_WRITEABLE; - -	/* Apply ASIC specific mapping flags */ -	amdgpu_gmc_get_vm_pte(adev, mapping, &flags); - -	trace_amdgpu_vm_bo_update(mapping); - -	pfn = mapping->offset >> PAGE_SHIFT; -	if (nodes) { -		while (pfn >= nodes->size) { -			pfn -= nodes->size; -			++nodes; -		} -	} -  	do { -		dma_addr_t *dma_addr = NULL; -		uint64_t max_entries; -		uint64_t addr, last; +		uint64_t tmp, num_entries, addr; + -		max_entries = mapping->last - start + 1; +		num_entries = last - start + 1;  		if (nodes) {  			addr = nodes->start << PAGE_SHIFT; -			max_entries = min((nodes->size - pfn) * -				AMDGPU_GPU_PAGES_IN_CPU_PAGE, max_entries); +			num_entries = min((nodes->size - pfn) * +				AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);  		} else {  			addr = 0;  		}  		if (pages_addr) { -			uint64_t count; +			bool contiguous = true; -			for (count = 1; -			     count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; -			     ++count) { -				uint64_t idx = pfn + count; +			if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) { +				uint64_t count; -				if (pages_addr[idx] != -				    (pages_addr[idx - 1] + PAGE_SIZE)) -					break; +				contiguous = pages_addr[pfn + 1] == +					pages_addr[pfn] + PAGE_SIZE; + +				tmp = num_entries / +					AMDGPU_GPU_PAGES_IN_CPU_PAGE; +				for (count = 2; count < tmp; ++count) { +					uint64_t idx = pfn + count; + +					if (contiguous != (pages_addr[idx] == +					    pages_addr[idx - 1] + PAGE_SIZE)) +						break; +				} +				num_entries = count * +					AMDGPU_GPU_PAGES_IN_CPU_PAGE;  			} -			if (count < min_linear_pages) { +			if (!contiguous) {  				addr = pfn << PAGE_SHIFT; -				dma_addr = pages_addr; +				params.pages_addr = pages_addr;  			} else {  				addr = pages_addr[pfn]; -				max_entries = count * -					AMDGPU_GPU_PAGES_IN_CPU_PAGE; +				params.pages_addr = NULL;  			}  		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { @@ -1738,23 +1693,25 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,  			addr += pfn << PAGE_SHIFT;  		} -		last = start + max_entries - 1; -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, -						start, last, flags, addr, -						dma_addr, fence); +		tmp = start + num_entries; +		r = amdgpu_vm_update_ptes(¶ms, start, tmp, addr, flags);  		if (r) -			return r; +			goto error_unlock; -		pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE; +		pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;  		if (nodes && nodes->size == pfn) {  			pfn = 0;  			++nodes;  		} -		start = last + 1; +		start = tmp; -	} while (unlikely(start != mapping->last + 1)); +	} while (unlikely(start != last + 1)); -	return 0; +	r = vm->update_funcs->commit(¶ms, fence); + +error_unlock: +	amdgpu_vm_eviction_unlock(vm); +	return r;  }  /** @@ -1790,7 +1747,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,  		resv = vm->root.base.bo->tbo.base.resv;  	} else {  		struct drm_gem_object *obj = &bo->tbo.base; -		struct ttm_dma_tt *ttm;  		resv = bo->tbo.base.resv;  		if (obj->import_attach && bo_va->is_xgmi) { @@ -1803,10 +1759,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,  		}  		mem = &bo->tbo.mem;  		nodes = mem->mm_node; -		if (mem->mem_type == TTM_PL_TT) { -			ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); -			pages_addr = ttm->dma_address; -		} +		if (mem->mem_type == TTM_PL_TT) +			pages_addr = bo->tbo.ttm->dma_address;  	}  	if (bo) { @@ -1835,9 +1789,26 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,  	}  	list_for_each_entry(mapping, &bo_va->invalids, list) { -		r = amdgpu_vm_bo_split_mapping(adev, resv, pages_addr, vm, -					       mapping, flags, bo_adev, nodes, -					       last_update); +		uint64_t update_flags = flags; + +		/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here +		 * but in case of something, we filter the flags in first place +		 */ +		if (!(mapping->flags & AMDGPU_PTE_READABLE)) +			update_flags &= ~AMDGPU_PTE_READABLE; +		if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) +			update_flags &= ~AMDGPU_PTE_WRITEABLE; + +		/* Apply ASIC specific mapping flags */ +		amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags); + +		trace_amdgpu_vm_bo_update(mapping); + +		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, +						resv, mapping->start, +						mapping->last, update_flags, +						mapping->offset, nodes, +						pages_addr, last_update);  		if (r)  			return r;  	} @@ -2045,9 +2016,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,  		    mapping->start < AMDGPU_GMC_HOLE_START)  			init_pte_value = AMDGPU_PTE_DEFAULT_ATC; -		r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, -						mapping->start, mapping->last, -						init_pte_value, 0, NULL, &f); +		r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, +						resv, mapping->start, +						mapping->last, init_pte_value, +						0, NULL, NULL, &f);  		amdgpu_vm_free_mapping(adev, vm, mapping, f);  		if (r) {  			dma_fence_put(f); @@ -2166,7 +2138,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,  /** - * amdgpu_vm_bo_insert_mapping - insert a new mapping + * amdgpu_vm_bo_insert_map - insert a new mapping   *   * @adev: amdgpu_device pointer   * @bo_va: bo_va to store the address @@ -2823,7 +2795,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	INIT_LIST_HEAD(&vm->invalidated);  	spin_lock_init(&vm->invalidated_lock);  	INIT_LIST_HEAD(&vm->freed); - +	INIT_LIST_HEAD(&vm->done);  	/* create scheduler entities for page table updates */  	r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, @@ -3375,8 +3347,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,  		value = 0;  	} -	r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr, -					addr + 1, flags, value, NULL, NULL); +	r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, +					addr, flags, value, NULL, NULL, +					NULL);  	if (r)  		goto error_unlock; @@ -3392,3 +3365,99 @@ error_unref:  	return false;  } + +#if defined(CONFIG_DEBUG_FS) +/** + * amdgpu_debugfs_vm_bo_info  - print BO info for the VM + * + * @vm: Requested VM for printing BO info + * @m: debugfs file + * + * Print BO information in debugfs file for the VM + */ +void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) +{ +	struct amdgpu_bo_va *bo_va, *tmp; +	u64 total_idle = 0; +	u64 total_evicted = 0; +	u64 total_relocated = 0; +	u64 total_moved = 0; +	u64 total_invalidated = 0; +	u64 total_done = 0; +	unsigned int total_idle_objs = 0; +	unsigned int total_evicted_objs = 0; +	unsigned int total_relocated_objs = 0; +	unsigned int total_moved_objs = 0; +	unsigned int total_invalidated_objs = 0; +	unsigned int total_done_objs = 0; +	unsigned int id = 0; + +	seq_puts(m, "\tIdle BOs:\n"); +	list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { +		if (!bo_va->base.bo) +			continue; +		total_idle += amdgpu_bo_print_info(id++, bo_va->base.bo, m); +	} +	total_idle_objs = id; +	id = 0; + +	seq_puts(m, "\tEvicted BOs:\n"); +	list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) { +		if (!bo_va->base.bo) +			continue; +		total_evicted += amdgpu_bo_print_info(id++, bo_va->base.bo, m); +	} +	total_evicted_objs = id; +	id = 0; + +	seq_puts(m, "\tRelocated BOs:\n"); +	list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) { +		if (!bo_va->base.bo) +			continue; +		total_relocated += amdgpu_bo_print_info(id++, bo_va->base.bo, m); +	} +	total_relocated_objs = id; +	id = 0; + +	seq_puts(m, "\tMoved BOs:\n"); +	list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { +		if (!bo_va->base.bo) +			continue; +		total_moved += amdgpu_bo_print_info(id++, bo_va->base.bo, m); +	} +	total_moved_objs = id; +	id = 0; + +	seq_puts(m, "\tInvalidated BOs:\n"); +	spin_lock(&vm->invalidated_lock); +	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { +		if (!bo_va->base.bo) +			continue; +		total_invalidated += amdgpu_bo_print_info(id++,	bo_va->base.bo, m); +	} +	total_invalidated_objs = id; +	id = 0; + +	seq_puts(m, "\tDone BOs:\n"); +	list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) { +		if (!bo_va->base.bo) +			continue; +		total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m); +	} +	spin_unlock(&vm->invalidated_lock); +	total_done_objs = id; + +	seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n", total_idle, +		   total_idle_objs); +	seq_printf(m, "\tTotal evicted size:     %12lld\tobjs:\t%d\n", total_evicted, +		   total_evicted_objs); +	seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n", total_relocated, +		   total_relocated_objs); +	seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n", total_moved, +		   total_moved_objs); +	seq_printf(m, "\tTotal invalidated size: %12lld\tobjs:\t%d\n", total_invalidated, +		   total_invalidated_objs); +	seq_printf(m, "\tTotal done size:        %12lld\tobjs:\t%d\n", total_done, +		   total_done_objs); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 58c83a7ad0fd..976a12e5a8b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -76,6 +76,9 @@ struct amdgpu_bo_list_entry;  /* PTE is handled as PDE for VEGA10 (Translate Further) */  #define AMDGPU_PTE_TF		(1ULL << 56) +/* MALL noalloc for sienna_cichlid, reserved for older ASICs  */ +#define AMDGPU_PTE_NOALLOC	(1ULL << 58) +  /* PDE Block Fragment Size for VEGA10 */  #define AMDGPU_PDE_BFS(a)	((uint64_t)a << 59) @@ -104,7 +107,7 @@ struct amdgpu_bo_list_entry;  #define AMDGPU_VM_FAULT_STOP_ALWAYS	2  /* Reserve 4MB VRAM for page tables */ -#define AMDGPU_VM_RESERVED_VRAM		(4ULL << 20) +#define AMDGPU_VM_RESERVED_VRAM		(8ULL << 20)  /* max number of VMHUB */  #define AMDGPU_MAX_VMHUBS			3 @@ -274,6 +277,9 @@ struct amdgpu_vm {  	/* BO mappings freed, but not yet updated in the PT */  	struct list_head	freed; +	/* BOs which are invalidated, has been updated in the PTs */ +	struct list_head        done; +  	/* contains the page directory */  	struct amdgpu_vm_pt     root;  	struct dma_fence	*last_update; @@ -441,4 +447,8 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,  				struct amdgpu_vm *vm);  void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo); +#if defined(CONFIG_DEBUG_FS) +void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); +#endif +  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 0786e7555554..ac45d9c7a4e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -38,8 +38,8 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo *table)   * amdgpu_vm_cpu_prepare - prepare page table update with the CPU   *   * @p: see amdgpu_vm_update_params definition - * @owner: owner we need to sync to - * @exclusive: exclusive move fence we need to sync to + * @resv: reservation object with embedded fence + * @sync_mode: synchronization mode   *   * Returns:   * Negativ errno, 0 for success. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index db790574dc2e..a83a646759c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -51,8 +51,8 @@ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo *table)   * amdgpu_vm_sdma_prepare - prepare SDMA command submission   *   * @p: see amdgpu_vm_update_params definition - * @owner: owner we need to sync to - * @exclusive: exclusive move fence we need to sync to + * @resv: reservation object with embedded fence + * @sync_mode: synchronization mode   *   * Returns:   * Negativ errno, 0 for success. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 01c1171afbe0..d2de2a720a3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -168,8 +168,7 @@ static const struct ttm_resource_manager_func amdgpu_vram_mgr_func;  /**   * amdgpu_vram_mgr_init - init VRAM manager and DRM MM   * - * @man: TTM memory type manager - * @p_size: maximum size of VRAM + * @adev: amdgpu_device pointer   *   * Allocate and initialize the VRAM manager.   */ @@ -185,6 +184,8 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)  	drm_mm_init(&mgr->mm, 0, man->size);  	spin_lock_init(&mgr->lock); +	INIT_LIST_HEAD(&mgr->reservations_pending); +	INIT_LIST_HEAD(&mgr->reserved_pages);  	/* Add the two VRAM-related sysfs files */  	ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); @@ -199,7 +200,7 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)  /**   * amdgpu_vram_mgr_fini - free and destroy VRAM manager   * - * @man: TTM memory type manager + * @adev: amdgpu_device pointer   *   * Destroy and free the VRAM manager, returns -EBUSY if ranges are still   * allocated inside it. @@ -209,14 +210,22 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)  	struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;  	struct ttm_resource_manager *man = &mgr->manager;  	int ret; +	struct amdgpu_vram_reservation *rsv, *temp;  	ttm_resource_manager_set_used(man, false); -	ret = ttm_resource_manager_force_list_clean(&adev->mman.bdev, man); +	ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);  	if (ret)  		return;  	spin_lock(&mgr->lock); +	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) +		kfree(rsv); + +	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { +		drm_mm_remove_node(&rsv->mm_node); +		kfree(rsv); +	}  	drm_mm_takedown(&mgr->mm);  	spin_unlock(&mgr->lock); @@ -229,7 +238,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)  /**   * amdgpu_vram_mgr_vis_size - Calculate visible node size   * - * @adev: amdgpu device structure + * @adev: amdgpu_device pointer   * @node: MM node structure   *   * Calculate how many bytes of the MM node are inside visible VRAM @@ -275,6 +284,101 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)  	return usage;  } +static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) +{ +	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); +	struct amdgpu_device *adev = to_amdgpu_device(mgr); +	struct drm_mm *mm = &mgr->mm; +	struct amdgpu_vram_reservation *rsv, *temp; +	uint64_t vis_usage; + +	list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) { +		if (drm_mm_reserve_node(mm, &rsv->mm_node)) +			continue; + +		dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n", +			rsv->mm_node.start, rsv->mm_node.size); + +		vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); +		atomic64_add(vis_usage, &mgr->vis_usage); +		atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage); +		list_move(&rsv->node, &mgr->reserved_pages); +	} +} + +/** + * amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM + * + * @man: TTM memory type manager + * @start: start address of the range in VRAM + * @size: size of the range + * + * Reserve memory from start addess with the specified size in VRAM + */ +int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, +				  uint64_t start, uint64_t size) +{ +	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); +	struct amdgpu_vram_reservation *rsv; + +	rsv = kzalloc(sizeof(*rsv), GFP_KERNEL); +	if (!rsv) +		return -ENOMEM; + +	INIT_LIST_HEAD(&rsv->node); +	rsv->mm_node.start = start >> PAGE_SHIFT; +	rsv->mm_node.size = size >> PAGE_SHIFT; + +	spin_lock(&mgr->lock); +	list_add_tail(&mgr->reservations_pending, &rsv->node); +	amdgpu_vram_mgr_do_reserve(man); +	spin_unlock(&mgr->lock); + +	return 0; +} + +/** + * amdgpu_vram_mgr_query_page_status - query the reservation status + * + * @man: TTM memory type manager + * @start: start address of a page in VRAM + * + * Returns: + *	-EBUSY: the page is still hold and in pending list + *	0: the page has been reserved + *	-ENOENT: the input page is not a reservation + */ +int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man, +				      uint64_t start) +{ +	struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); +	struct amdgpu_vram_reservation *rsv; +	int ret; + +	spin_lock(&mgr->lock); + +	list_for_each_entry(rsv, &mgr->reservations_pending, node) { +		if ((rsv->mm_node.start <= start) && +		    (start < (rsv->mm_node.start + rsv->mm_node.size))) { +			ret = -EBUSY; +			goto out; +		} +	} + +	list_for_each_entry(rsv, &mgr->reserved_pages, node) { +		if ((rsv->mm_node.start <= start) && +		    (start < (rsv->mm_node.start + rsv->mm_node.size))) { +			ret = 0; +			goto out; +		} +	} + +	ret = -ENOENT; +out: +	spin_unlock(&mgr->lock); +	return ret; +} +  /**   * amdgpu_vram_mgr_virt_start - update virtual start address   * @@ -445,6 +549,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,  		vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes);  		++nodes;  	} +	amdgpu_vram_mgr_do_reserve(man);  	spin_unlock(&mgr->lock);  	atomic64_sub(usage, &mgr->usage); @@ -529,9 +634,11 @@ error_free:  }  /** - * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table + * amdgpu_vram_mgr_free_sgt - allocate and fill a sg table   *   * @adev: amdgpu device pointer + * @dev: device pointer + * @dir: data direction of resource to unmap   * @sgt: sg table to free   *   * Free a previously allocate sg table. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 1162913c8bf4..541ef6be390f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -246,7 +246,7 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,  	adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); -	return snprintf(buf, PAGE_SIZE, "%d\n", error_count); +	return snprintf(buf, PAGE_SIZE, "%u\n", error_count);  } @@ -395,12 +395,17 @@ void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive)  int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)  {  	int ret = 0; -	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); -	struct amdgpu_device *request_adev = hive->hi_req_gpu ? -						hive->hi_req_gpu : adev; +	struct amdgpu_hive_info *hive; +	struct amdgpu_device *request_adev;  	bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20; -	bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; +	bool init_low; + +	hive = amdgpu_get_xgmi_hive(adev); +	if (!hive) +		return 0; +	request_adev = hive->hi_req_gpu ? hive->hi_req_gpu : adev; +	init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;  	amdgpu_put_xgmi_hive(hive);  	/* fw bug so temporarily disable pstate switching */  	return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c index 939eca63b094..66c183ddd43e 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -74,6 +74,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev,  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		athub_v2_1_update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE ? true : false);  		athub_v2_1_update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 4cfc786699c7..515890f4f5a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -66,13 +66,13 @@ typedef struct {  	bool abort;  } atom_exec_context; -int amdgpu_atom_debug = 0; -static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t * params); -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t * params); +int amdgpu_atom_debug; +static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params); +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params);  static uint32_t atom_arg_mask[8] = -    { 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000, -0xFF000000 }; +	{ 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000, +	  0xFF000000 };  static int atom_arg_shift[8] = { 0, 0, 8, 16, 0, 8, 16, 24 };  static int atom_dst_to_src[8][4] = { @@ -88,7 +88,7 @@ static int atom_dst_to_src[8][4] = {  };  static int atom_def_dst[8] = { 0, 0, 1, 2, 0, 1, 2, 3 }; -static int debug_depth = 0; +static int debug_depth;  #ifdef ATOM_DEBUG  static void debug_print_spaces(int n)  { @@ -1201,7 +1201,7 @@ static struct {  	atom_op_div32, ATOM_ARG_WS},  }; -static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t * params) +static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params)  {  	int base = CU16(ctx->cmd_table + 4 + 2 * index);  	int len, ws, ps, ptr; @@ -1262,7 +1262,7 @@ free:  	return ret;  } -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t * params) +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params)  {  	int r; @@ -1388,8 +1388,8 @@ void amdgpu_atom_destroy(struct atom_context *ctx)  }  bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, -			    uint16_t * size, uint8_t * frev, uint8_t * crev, -			    uint16_t * data_start) +			    uint16_t *size, uint8_t *frev, uint8_t *crev, +			    uint16_t *data_start)  {  	int offset = index * 2 + 4;  	int idx = CU16(ctx->data_table + offset); @@ -1408,8 +1408,8 @@ bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index,  	return true;  } -bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index, uint8_t * frev, -			   uint8_t * crev) +bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index, uint8_t *frev, +			   uint8_t *crev)  {  	int offset = index * 2 + 4;  	int idx = CU16(ctx->cmd_table + offset); diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 8339c8c3a328..6134ed964027 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -171,7 +171,6 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode  	struct backlight_properties props;  	struct amdgpu_backlight_privdata *pdata;  	struct amdgpu_encoder_atom_dig *dig; -	u8 backlight_level;  	char bl_name[16];  	/* Mac laptops with multiple GPUs use the gmux driver for backlight @@ -207,8 +206,6 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode  	pdata->encoder = amdgpu_encoder; -	backlight_level = amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); -  	dig = amdgpu_encoder->enc_priv;  	dig->bl_dev = bd; @@ -499,10 +496,8 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)  		} else {  			return ATOM_ENCODER_MODE_DVI;  		} -		break;  	case DRM_MODE_CONNECTOR_LVDS:  		return ATOM_ENCODER_MODE_LVDS; -		break;  	case DRM_MODE_CONNECTOR_DisplayPort:  		dig_connector = amdgpu_connector->con_priv;  		if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) || @@ -519,20 +514,16 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder)  		} else {  			return ATOM_ENCODER_MODE_DVI;  		} -		break;  	case DRM_MODE_CONNECTOR_eDP:  		return ATOM_ENCODER_MODE_DP;  	case DRM_MODE_CONNECTOR_DVIA:  	case DRM_MODE_CONNECTOR_VGA:  		return ATOM_ENCODER_MODE_CRT; -		break;  	case DRM_MODE_CONNECTOR_Composite:  	case DRM_MODE_CONNECTOR_SVIDEO:  	case DRM_MODE_CONNECTOR_9PinDIN:  		/* fix me */  		return ATOM_ENCODER_MODE_TV; -		/*return ATOM_ENCODER_MODE_CV;*/ -		break;  	}  } diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c index 09a538465ffd..af0335535f82 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c @@ -159,7 +159,7 @@ u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap)  	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;  } -void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev, u8 slave_addr, u8 line_number, u8 offset, u8 data) +void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device *adev, u8 slave_addr, u8 line_number, u8 offset, u8 data)  {  	PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION args;  	int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction); diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 03ff8bd1fee8..13737b317f7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1337,9 +1337,7 @@ cik_asic_reset_method(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_BONAIRE:  	case CHIP_HAWAII: -		/* disable baco reset until it works */ -		/* smu7_asic_get_baco_capability(adev, &baco_reset); */ -		baco_reset = false; +		baco_reset = cik_asic_supports_baco(adev);  		break;  	default:  		baco_reset = false; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index db953e95f3d2..d3745711d55f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -177,6 +177,7 @@ static void cik_ih_irq_disable(struct amdgpu_device *adev)   * cik_ih_get_wptr - get the IH ring buffer wptr   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr   *   * Get the IH ring buffer wptr from either the register   * or the writeback memory buffer (CIK).  Also check for @@ -266,6 +267,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,   * cik_ih_set_rptr - set the IH ring buffer rptr   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set wptr   *   * Set the IH ring buffer rptr.   */ diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 20f108818b2b..43b978144b79 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)  	struct amdgpu_device *adev = ring->adev;  	WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], -		       	(lower_32_bits(ring->wptr) << 2) & 0x3fffc); +	       (lower_32_bits(ring->wptr) << 2) & 0x3fffc);  }  static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -215,7 +215,9 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)   * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine   *   * @ring: amdgpu ring pointer + * @job: job to retrive vmid from   * @ib: IB object to schedule + * @flags: unused   *   * Schedule an IB in the DMA ring (CIK).   */ @@ -267,7 +269,9 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)   * cik_sdma_ring_emit_fence - emit a fence on the DMA ring   *   * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Add a DMA fence packet to the ring to write   * the fence seq number and DMA trap packet to generate @@ -655,6 +659,7 @@ error_free_wb:   * cik_sdma_ring_test_ib - test an IB on the DMA engine   *   * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   * Test a simple IB in the DMA ring (CIK).   * Returns 0 on success, error on failure. @@ -801,6 +806,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,  /**   * cik_sdma_vm_pad_ib - pad the IB to the required number of dw   * + * @ring: amdgpu_ring structure holding ring information   * @ib: indirect buffer to fill with padding   *   */ @@ -849,7 +855,8 @@ static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)   * cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA   *   * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address   *   * Update the page table base and flush the VM TLB   * using sDMA (CIK). @@ -1071,22 +1078,19 @@ static int cik_sdma_soft_reset(void *handle)  {  	u32 srbm_soft_reset = 0;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	u32 tmp = RREG32(mmSRBM_STATUS2); +	u32 tmp; -	if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) { -		/* sdma0 */ -		tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); -		tmp |= SDMA0_F32_CNTL__HALT_MASK; -		WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); -		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; -	} -	if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) { -		/* sdma1 */ -		tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); -		tmp |= SDMA0_F32_CNTL__HALT_MASK; -		WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); -		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; -	} +	/* sdma0 */ +	tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); +	tmp |= SDMA0_F32_CNTL__HALT_MASK; +	WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); +	srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; + +	/* sdma1 */ +	tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); +	tmp |= SDMA0_F32_CNTL__HALT_MASK; +	WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); +	srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;  	if (srbm_soft_reset) {  		tmp = RREG32(mmSRBM_SOFT_RESET); @@ -1301,10 +1305,11 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)  /**   * cik_sdma_emit_copy_buffer - copy buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to   * @src_offset: src GPU address   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer + * @tmz: is this a secure operation   *   * Copy GPU buffers using the DMA engine (CIK).   * Used by the amdgpu ttm implementation to move pages if @@ -1328,7 +1333,7 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,  /**   * cik_sdma_emit_fill_buffer - fill buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to fill   * @src_data: value to write to buffer   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 1dca0cabc326..da37f8a900af 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -179,6 +179,7 @@ static void cz_ih_irq_disable(struct amdgpu_device *adev)   * cz_ih_get_wptr - get the IH ring buffer wptr   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr   *   * Get the IH ring buffer wptr from either the register   * or the writeback memory buffer (VI).  Also check for @@ -213,6 +214,8 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,   * cz_ih_decode_iv - decode an interrupt vector   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into   *   * Decodes the interrupt vector at the current rptr   * position and also advance the position. @@ -245,6 +248,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,   * cz_ih_set_rptr - set the IH ring buffer rptr   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr   *   * Set the IH ring buffer rptr.   */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 5963cbe0d455..7944781e1086 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -228,6 +228,7 @@ static void dce_v10_0_pageflip_interrupt_fini(struct amdgpu_device *adev)   * @adev: amdgpu_device pointer   * @crtc_id: crtc to cleanup pageflip on   * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip   *   * Triggers the actual pageflip by updating the primary   * surface base address. @@ -2202,22 +2203,18 @@ static int dce_v10_0_pick_dig_encoder(struct drm_encoder *encoder)  			return 1;  		else  			return 0; -		break;  	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:  		if (dig->linkb)  			return 3;  		else  			return 2; -		break;  	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:  		if (dig->linkb)  			return 5;  		else  			return 4; -		break;  	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:  		return 6; -		break;  	default:  		DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);  		return 0; @@ -2677,7 +2674,7 @@ static int dce_v10_0_crtc_set_base_atomic(struct drm_crtc *crtc,  					 struct drm_framebuffer *fb,  					 int x, int y, enum mode_set_atomic state)  { -       return dce_v10_0_crtc_do_set_base(crtc, fb, x, y, 1); +	return dce_v10_0_crtc_do_set_base(crtc, fb, x, y, 1);  }  static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 1954472c8e8f..1b6ff0470011 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -246,6 +246,7 @@ static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev)   * @adev: amdgpu_device pointer   * @crtc_id: crtc to cleanup pageflip on   * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip   *   * Triggers the actual pageflip by updating the primary   * surface base address. @@ -2235,22 +2236,18 @@ static int dce_v11_0_pick_dig_encoder(struct drm_encoder *encoder)  			return 1;  		else  			return 0; -		break;  	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:  		if (dig->linkb)  			return 3;  		else  			return 2; -		break;  	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:  		if (dig->linkb)  			return 5;  		else  			return 4; -		break;  	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:  		return 6; -		break;  	default:  		DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);  		return 0; @@ -2304,19 +2301,16 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc)  				return ATOM_COMBOPHY_PLL1;  			else  				return ATOM_COMBOPHY_PLL0; -			break;  		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:  			if (dig->linkb)  				return ATOM_COMBOPHY_PLL3;  			else  				return ATOM_COMBOPHY_PLL2; -			break;  		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:  			if (dig->linkb)  				return ATOM_COMBOPHY_PLL5;  			else  				return ATOM_COMBOPHY_PLL4; -			break;  		default:  			DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id);  			return ATOM_PPLL_INVALID; @@ -2785,7 +2779,7 @@ static int dce_v11_0_crtc_set_base_atomic(struct drm_crtc *crtc,  					 struct drm_framebuffer *fb,  					 int x, int y, enum mode_set_atomic state)  { -       return dce_v11_0_crtc_do_set_base(crtc, fb, x, y, 1); +	return dce_v11_0_crtc_do_set_base(crtc, fb, x, y, 1);  }  static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 3a44753a80d1..83a88385b762 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -180,6 +180,7 @@ static void dce_v6_0_pageflip_interrupt_fini(struct amdgpu_device *adev)   * @adev: amdgpu_device pointer   * @crtc_id: crtc to cleanup pageflip on   * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip   *   * Does the actual pageflip (evergreen+).   * During vblank we take the crtc lock and wait for the update_pending @@ -1047,7 +1048,6 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev,  /** - *   * dce_v6_0_bandwidth_update - program display watermarks   *   * @adev: amdgpu_device pointer @@ -2567,7 +2567,7 @@ static int dce_v6_0_crtc_set_base_atomic(struct drm_crtc *crtc,  					 struct drm_framebuffer *fb,  					 int x, int y, enum mode_set_atomic state)  { -       return dce_v6_0_crtc_do_set_base(crtc, fb, x, y, 1); +	return dce_v6_0_crtc_do_set_base(crtc, fb, x, y, 1);  }  static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 3603e5f13077..224b30214427 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -176,6 +176,7 @@ static void dce_v8_0_pageflip_interrupt_fini(struct amdgpu_device *adev)   * @adev: amdgpu_device pointer   * @crtc_id: crtc to cleanup pageflip on   * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip   *   * Triggers the actual pageflip by updating the primary   * surface base address. @@ -2498,7 +2499,7 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc)  	case ATOM_PPLL2:  		/* disable the ppll */  		amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id, -                                                 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); +						 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);  		break;  	case ATOM_PPLL0:  		/* disable the ppll */ @@ -2585,7 +2586,7 @@ static int dce_v8_0_crtc_set_base_atomic(struct drm_crtc *crtc,  					 struct drm_framebuffer *fb,  					 int x, int y, enum mode_set_atomic state)  { -       return dce_v8_0_crtc_do_set_base(crtc, fb, x, y, 1); +	return dce_v8_0_crtc_do_set_base(crtc, fb, x, y, 1);  }  static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index b4d4b76538d2..ffcc64ec6473 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -139,9 +139,6 @@ static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode)  	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);  	unsigned type; -	if (amdgpu_sriov_vf(adev)) -		return; -  	switch (mode) {  	case DRM_MODE_DPMS_ON:  		amdgpu_crtc->enabled = true; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index d6aca1c08068..2d01ac0d4c11 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -41,7 +41,7 @@ static void df_v1_7_sw_fini(struct amdgpu_device *adev)  }  static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev, -                                          bool enable) +					  bool enable)  {  	u32 tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 7b89fd2aa44a..6b4b30a8dce5 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -30,71 +30,17 @@  #define DF_3_6_SMN_REG_INST_DIST        0x8  #define DF_3_6_INST_CNT                 8 -static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, -				       16, 32, 0, 0, 0, 2, 4, 8}; - -/* init df format attrs */ -AMDGPU_PMU_ATTR(event,		"config:0-7"); -AMDGPU_PMU_ATTR(instance,	"config:8-15"); -AMDGPU_PMU_ATTR(umask,		"config:16-23"); - -/* df format attributes  */ -static struct attribute *df_v3_6_format_attrs[] = { -	&pmu_attr_event.attr, -	&pmu_attr_instance.attr, -	&pmu_attr_umask.attr, -	NULL -}; - -/* df format attribute group */ -static struct attribute_group df_v3_6_format_attr_group = { -	.name = "format", -	.attrs = df_v3_6_format_attrs, -}; - -/* df event attrs */ -AMDGPU_PMU_ATTR(cake0_pcsout_txdata, -		      "event=0x7,instance=0x46,umask=0x2"); -AMDGPU_PMU_ATTR(cake1_pcsout_txdata, -		      "event=0x7,instance=0x47,umask=0x2"); -AMDGPU_PMU_ATTR(cake0_pcsout_txmeta, -		      "event=0x7,instance=0x46,umask=0x4"); -AMDGPU_PMU_ATTR(cake1_pcsout_txmeta, -		      "event=0x7,instance=0x47,umask=0x4"); -AMDGPU_PMU_ATTR(cake0_ftiinstat_reqalloc, -		      "event=0xb,instance=0x46,umask=0x4"); -AMDGPU_PMU_ATTR(cake1_ftiinstat_reqalloc, -		      "event=0xb,instance=0x47,umask=0x4"); -AMDGPU_PMU_ATTR(cake0_ftiinstat_rspalloc, -		      "event=0xb,instance=0x46,umask=0x8"); -AMDGPU_PMU_ATTR(cake1_ftiinstat_rspalloc, -		      "event=0xb,instance=0x47,umask=0x8"); - -/* df event attributes  */ -static struct attribute *df_v3_6_event_attrs[] = { -	&pmu_attr_cake0_pcsout_txdata.attr, -	&pmu_attr_cake1_pcsout_txdata.attr, -	&pmu_attr_cake0_pcsout_txmeta.attr, -	&pmu_attr_cake1_pcsout_txmeta.attr, -	&pmu_attr_cake0_ftiinstat_reqalloc.attr, -	&pmu_attr_cake1_ftiinstat_reqalloc.attr, -	&pmu_attr_cake0_ftiinstat_rspalloc.attr, -	&pmu_attr_cake1_ftiinstat_rspalloc.attr, -	NULL -}; +/* Defined in global_features.h as FTI_PERFMON_VISIBLE */ +#define DF_V3_6_MAX_COUNTERS		4 -/* df event attribute group */ -static struct attribute_group df_v3_6_event_attr_group = { -	.name = "events", -	.attrs = df_v3_6_event_attrs -}; +/* get flags from df perfmon config */ +#define DF_V3_6_GET_EVENT(x)		(x & 0xFFUL) +#define DF_V3_6_GET_INSTANCE(x)		((x >> 8) & 0xFFUL) +#define DF_V3_6_GET_UNITMASK(x)		((x >> 16) & 0xFFUL) +#define DF_V3_6_PERFMON_OVERFLOW	0xFFFFFFFFFFFFULL -/* df event attr groups  */ -const struct attribute_group *df_v3_6_attr_groups[] = { -		&df_v3_6_format_attr_group, -		&df_v3_6_event_attr_group, -		NULL -}; +static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, +				       16, 32, 0, 0, 0, 2, 4, 8};  static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,  				 uint32_t ficaa_val) @@ -391,33 +337,28 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,  }  /* get assigned df perfmon ctr as int */ -static int df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev, -				      uint64_t config) +static bool df_v3_6_pmc_has_counter(struct amdgpu_device *adev, +				      uint64_t config, +				      int counter_idx)  { -	int i; -	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { -		if ((config & 0x0FFFFFFUL) == -					adev->df_perfmon_config_assign_mask[i]) -			return i; -	} +	return ((config & 0x0FFFFFFUL) == +			adev->df_perfmon_config_assign_mask[counter_idx]); -	return -EINVAL;  }  /* get address based on counter assignment */  static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,  				 uint64_t config, +				 int counter_idx,  				 int is_ctrl,  				 uint32_t *lo_base_addr,  				 uint32_t *hi_base_addr)  { -	int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - -	if (target_cntr < 0) +	if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))  		return; -	switch (target_cntr) { +	switch (counter_idx) {  	case 0:  		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4; @@ -443,15 +384,18 @@ static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,  /* get read counter address */  static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,  					  uint64_t config, +					  int counter_idx,  					  uint32_t *lo_base_addr,  					  uint32_t *hi_base_addr)  { -	df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr); +	df_v3_6_pmc_get_addr(adev, config, counter_idx, 0, lo_base_addr, +								hi_base_addr);  }  /* get control counter settings i.e. address and values to set */  static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,  					  uint64_t config, +					  int counter_idx,  					  uint32_t *lo_base_addr,  					  uint32_t *hi_base_addr,  					  uint32_t *lo_val, @@ -462,7 +406,8 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,  	uint32_t eventsel, instance, unitmask;  	uint32_t instance_10, instance_5432, instance_76; -	df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); +	df_v3_6_pmc_get_addr(adev, config, counter_idx, 1, lo_base_addr, +				hi_base_addr);  	if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {  		DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x", @@ -492,18 +437,13 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,  static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,  				   uint64_t config)  { -	int i, target_cntr; - -	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - -	if (target_cntr >= 0) -		return 0; +	int i;  	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {  		if (adev->df_perfmon_config_assign_mask[i] == 0U) {  			adev->df_perfmon_config_assign_mask[i] =  							config & 0x0FFFFFFUL; -			return 0; +			return i;  		}  	} @@ -512,59 +452,50 @@ static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,  #define DEFERRED_ARM_MASK	(1 << 31)  static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev, -				    uint64_t config, bool is_deferred) +				    int counter_idx, uint64_t config, +				    bool is_deferred)  { -	int target_cntr; -	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - -	if (target_cntr < 0) +	if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))  		return -EINVAL;  	if (is_deferred) -		adev->df_perfmon_config_assign_mask[target_cntr] |= +		adev->df_perfmon_config_assign_mask[counter_idx] |=  							DEFERRED_ARM_MASK;  	else -		adev->df_perfmon_config_assign_mask[target_cntr] &= +		adev->df_perfmon_config_assign_mask[counter_idx] &=  							~DEFERRED_ARM_MASK;  	return 0;  }  static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev, +				    int counter_idx,  				    uint64_t config)  { -	int target_cntr; - -	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - -	/* -	 * we never get target_cntr < 0 since this funciton is only called in -	 * pmc_count for now but we should check anyways. -	 */ -	return (target_cntr >= 0 && -			(adev->df_perfmon_config_assign_mask[target_cntr] -			& DEFERRED_ARM_MASK)); +	return	(df_v3_6_pmc_has_counter(adev, config, counter_idx) && +			(adev->df_perfmon_config_assign_mask[counter_idx] +				& DEFERRED_ARM_MASK));  }  /* release performance counter */  static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, -				     uint64_t config) +				     uint64_t config, +				     int counter_idx)  { -	int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - -	if (target_cntr >= 0) -		adev->df_perfmon_config_assign_mask[target_cntr] = 0ULL; +	if (df_v3_6_pmc_has_counter(adev, config, counter_idx)) +		adev->df_perfmon_config_assign_mask[counter_idx] = 0ULL;  }  static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev, -					 uint64_t config) +					 uint64_t config, +					 int counter_idx)  {  	uint32_t lo_base_addr = 0, hi_base_addr = 0; -	df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, +	df_v3_6_pmc_get_read_settings(adev, config, counter_idx, &lo_base_addr,  				      &hi_base_addr);  	if ((lo_base_addr == 0) || (hi_base_addr == 0)) @@ -573,21 +504,22 @@ static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,  	df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);  } +/* return available counter if is_add == 1 otherwise return error status. */  static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, -			     int is_add) +			     int counter_idx, int is_add)  {  	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;  	int err = 0, ret = 0;  	switch (adev->asic_type) {  	case CHIP_VEGA20: +	case CHIP_ARCTURUS:  		if (is_add)  			return df_v3_6_pmc_add_cntr(adev, config); -		df_v3_6_reset_perfmon_cntr(adev, config); -  		ret = df_v3_6_pmc_get_ctrl_settings(adev,  					config, +					counter_idx,  					&lo_base_addr,  					&hi_base_addr,  					&lo_val, @@ -604,7 +536,8 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,  						     hi_val);  		if (err) -			ret = df_v3_6_pmc_set_deferred(adev, config, true); +			ret = df_v3_6_pmc_set_deferred(adev, config, +							counter_idx, true);  		break;  	default: @@ -615,15 +548,17 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,  }  static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, -			    int is_remove) +			    int counter_idx, int is_remove)  {  	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;  	int ret = 0;  	switch (adev->asic_type) {  	case CHIP_VEGA20: +	case CHIP_ARCTURUS:  		ret = df_v3_6_pmc_get_ctrl_settings(adev,  			config, +			counter_idx,  			&lo_base_addr,  			&hi_base_addr,  			&lo_val, @@ -635,8 +570,8 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,  		if (is_remove) { -			df_v3_6_reset_perfmon_cntr(adev, config); -			df_v3_6_pmc_release_cntr(adev, config); +			df_v3_6_reset_perfmon_cntr(adev, config, counter_idx); +			df_v3_6_pmc_release_cntr(adev, config, counter_idx);  		}  		break; @@ -649,6 +584,7 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,  static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,  				  uint64_t config, +				  int counter_idx,  				  uint64_t *count)  {  	uint32_t lo_base_addr = 0, hi_base_addr = 0, lo_val = 0, hi_val = 0; @@ -656,14 +592,15 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,  	switch (adev->asic_type) {  	case CHIP_VEGA20: -		df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, -				      &hi_base_addr); +	case CHIP_ARCTURUS: +		df_v3_6_pmc_get_read_settings(adev, config, counter_idx, +						&lo_base_addr, &hi_base_addr);  		if ((lo_base_addr == 0) || (hi_base_addr == 0))  			return;  		/* rearm the counter or throw away count value on failure */ -		if (df_v3_6_pmc_is_deferred(adev, config)) { +		if (df_v3_6_pmc_is_deferred(adev, config, counter_idx)) {  			int rearm_err = df_v3_6_perfmon_arm_with_status(adev,  							lo_base_addr, lo_val,  							hi_base_addr, hi_val); @@ -671,7 +608,8 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,  			if (rearm_err)  				return; -			df_v3_6_pmc_set_deferred(adev, config, false); +			df_v3_6_pmc_set_deferred(adev, config, counter_idx, +									false);  		}  		df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h index 76998541bc30..2505c7ef258a 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h @@ -35,15 +35,6 @@ enum DF_V3_6_MGCG {  	DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15  }; -/* Defined in global_features.h as FTI_PERFMON_VISIBLE */ -#define DF_V3_6_MAX_COUNTERS		4 - -/* get flags from df perfmon config */ -#define DF_V3_6_GET_EVENT(x)		(x & 0xFFUL) -#define DF_V3_6_GET_INSTANCE(x)		((x >> 8) & 0xFFUL) -#define DF_V3_6_GET_UNITMASK(x)		((x >> 16) & 0xFFUL) -#define DF_V3_6_PERFMON_OVERFLOW	0xFFFFFFFFFFFFULL -  extern const struct attribute_group *df_v3_6_attr_groups[];  extern const struct amdgpu_df_funcs df_v3_6_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c new file mode 100755 index 000000000000..e9f177e9e3cf --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c @@ -0,0 +1,54 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "dimgrey_cavefish_ip_offset.h" + +int dimgrey_cavefish_reg_base_init(struct amdgpu_device *adev) +{ +	/* HW has more IP blocks,  only initialize the block needed by driver */ +	uint32_t i; +	for (i = 0 ; i < MAX_INSTANCE ; ++i) { +		adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); +		adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); +		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); +		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); +		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); +		adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); +		adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN0_BASE.instance[i])); +		adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); +		adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); +		adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); +		adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); +		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); +	} +	return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 56fdbe626d30..ba1086784525 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -99,6 +99,23 @@  #define mmGCR_GENERAL_CNTL_Sienna_Cichlid			0x1580  #define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX	0 +#define mmSPI_CONFIG_CNTL_1_Vangogh		 0x2441 +#define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX	 1 +#define mmVGT_TF_MEMORY_BASE_HI_Vangogh          0x2261 +#define mmVGT_TF_MEMORY_BASE_HI_Vangogh_BASE_IDX 1 +#define mmVGT_HS_OFFCHIP_PARAM_Vangogh           0x224f +#define mmVGT_HS_OFFCHIP_PARAM_Vangogh_BASE_IDX  1 +#define mmVGT_TF_RING_SIZE_Vangogh               0x224e +#define mmVGT_TF_RING_SIZE_Vangogh_BASE_IDX      1 +#define mmVGT_GSVS_RING_SIZE_Vangogh             0x2241 +#define mmVGT_GSVS_RING_SIZE_Vangogh_BASE_IDX    1 +#define mmVGT_TF_MEMORY_BASE_Vangogh             0x2250 +#define mmVGT_TF_MEMORY_BASE_Vangogh_BASE_IDX    1 +#define mmVGT_ESGS_RING_SIZE_Vangogh             0x2240 +#define mmVGT_ESGS_RING_SIZE_Vangogh_BASE_IDX    1 +#define mmSPI_CONFIG_CNTL_Vangogh                0x2440 +#define mmSPI_CONFIG_CNTL_Vangogh_BASE_IDX       1 +  #define mmCP_HYP_PFP_UCODE_ADDR			0x5814  #define mmCP_HYP_PFP_UCODE_ADDR_BASE_IDX	1  #define mmCP_HYP_PFP_UCODE_DATA			0x5815 @@ -112,6 +129,13 @@  #define mmCP_HYP_ME_UCODE_DATA			0x5817  #define mmCP_HYP_ME_UCODE_DATA_BASE_IDX		1 +#define mmCPG_PSP_DEBUG				0x5c10 +#define mmCPG_PSP_DEBUG_BASE_IDX		1 +#define mmCPC_PSP_DEBUG				0x5c11 +#define mmCPC_PSP_DEBUG_BASE_IDX		1 +#define CPC_PSP_DEBUG__GPA_OVERRIDE_MASK	0x00000008L +#define CPG_PSP_DEBUG__GPA_OVERRIDE_MASK	0x00000008L +  //CC_GC_SA_UNIT_DISABLE  #define mmCC_GC_SA_UNIT_DISABLE                 0x0fe9  #define mmCC_GC_SA_UNIT_DISABLE_BASE_IDX        0 @@ -128,6 +152,14 @@  #define PA_SC_ENHANCE_3__FORCE_PBB_WORKLOAD_MODE_TO_ZERO__SHIFT 0x3  #define PA_SC_ENHANCE_3__FORCE_PBB_WORKLOAD_MODE_TO_ZERO_MASK   0x00000008L +#define mmCGTT_SPI_CS_CLK_CTRL			0x507c +#define mmCGTT_SPI_CS_CLK_CTRL_BASE_IDX         1 + +#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid		0x16f3 +#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX	0 +#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid          0x15db +#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX	0 +  MODULE_FIRMWARE("amdgpu/navi10_ce.bin");  MODULE_FIRMWARE("amdgpu/navi10_pfp.bin");  MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -168,6 +200,20 @@ MODULE_FIRMWARE("amdgpu/navy_flounder_mec.bin");  MODULE_FIRMWARE("amdgpu/navy_flounder_mec2.bin");  MODULE_FIRMWARE("amdgpu/navy_flounder_rlc.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_ce.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_pfp.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_me.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_mec.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_mec2.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ce.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_pfp.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_me.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec2.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_rlc.bin"); +  static const struct soc15_reg_golden golden_settings_gc_10_1[] =  {  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -1363,23 +1409,14 @@ static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)  {  	static void *scratch_reg0;  	static void *scratch_reg1; -	static void *scratch_reg2; -	static void *scratch_reg3;  	static void *spare_int; -	static uint32_t grbm_cntl; -	static uint32_t grbm_idx;  	uint32_t i = 0;  	uint32_t retries = 50000;  	scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;  	scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; -	scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4; -	scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;  	spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; -	grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; -	grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; -  	if (amdgpu_sriov_runtime(adev)) {  		pr_err("shouldn't call rlcg write register during runtime\n");  		return; @@ -3094,6 +3131,7 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] =  static const struct soc15_reg_golden golden_settings_gc_10_3[] =  { +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), @@ -3101,7 +3139,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] =  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), +	SOC15_REG_GOLDEN_VALUE(GC, 0 ,mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), @@ -3140,6 +3182,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] =  static const struct soc15_reg_golden golden_settings_gc_10_3_2[] =  { +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), @@ -3148,6 +3191,8 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] =  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffffffff, 0xff008080), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffff8fff, 0xff008080),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), @@ -3177,7 +3222,79 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] =  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000), -	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), + +	/* This is not in GDB yet. Don't remove it. It fixes a GPU hang on Navy Flounder. */ +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG,  0x00000020, 0x00000020), +}; + +static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = +{ +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000142), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), + +	/* This is not in GDB yet. Don't remove it. It fixes a GPU hang on VanGogh. */ +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG,  0x00000020, 0x00000020), +}; + +static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = +{ +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0x30000000, 0x30000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0x7e000000, 0x7e000100), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000280, 0x00000280), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07800000, 0x00800000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x00001d00, 0x00000500), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003c0000, 0x00280400), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0x40000000, 0x580f1008), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0x00040000, 0x00f80988), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0x01000000, 0x01200007), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0x0000001f, 0x00180070), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG,  0x00000020, 0x00000020)  };  #define DEFAULT_SH_MEM_CONFIG \ @@ -3192,7 +3309,7 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev);  static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev);  static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev);  static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, -                                 struct amdgpu_cu_info *cu_info); +				 struct amdgpu_cu_info *cu_info);  static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);  static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,  				   u32 sh_num, u32 instance); @@ -3389,7 +3506,16 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)  						golden_settings_gc_10_3_2,  						(const u32)ARRAY_SIZE(golden_settings_gc_10_3_2));  		break; - +	case CHIP_VANGOGH: +		soc15_program_register_sequence(adev, +						golden_settings_gc_10_3_vangogh, +						(const u32)ARRAY_SIZE(golden_settings_gc_10_3_vangogh)); +		break; +	case CHIP_DIMGREY_CAVEFISH: +		soc15_program_register_sequence(adev, +                                                golden_settings_gc_10_3_4, +                                                (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4)); +		break;  	default:  		break;  	} @@ -3573,6 +3699,8 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)  		break;  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		adev->gfx.cp_fw_write_wait = true;  		break;  	default: @@ -3640,7 +3768,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)  		if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))  			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;  		break; -	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH:  		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;  		break;  	default: @@ -3685,6 +3813,12 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)  	case CHIP_NAVY_FLOUNDER:  		chip_name = "navy_flounder";  		break; +	case CHIP_VANGOGH: +		chip_name = "vangogh"; +		break; +	case CHIP_DIMGREY_CAVEFISH: +		chip_name = "dimgrey_cavefish"; +		break;  	default:  		BUG();  	} @@ -4200,11 +4334,26 @@ static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,  }  static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev, -									  u32 me, u32 pipe, u32 q, u32 vm) - { -       nv_grbm_select(adev, me, pipe, q, vm); - } +				       u32 me, u32 pipe, u32 q, u32 vm) +{ +	nv_grbm_select(adev, me, pipe, q, vm); +} + +static void gfx_v10_0_update_perfmon_mgcg(struct amdgpu_device *adev, +					  bool enable) +{ +	uint32_t data, def; + +	data = def = RREG32_SOC15(GC, 0, mmRLC_PERFMON_CLK_CNTL); + +	if (enable) +		data |= RLC_PERFMON_CLK_CNTL__PERFMON_CLOCK_STATE_MASK; +	else +		data &= ~RLC_PERFMON_CLK_CNTL__PERFMON_CLOCK_STATE_MASK; +	if (data != def) +		WREG32_SOC15(GC, 0, mmRLC_PERFMON_CLK_CNTL, data); +}  static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = {  	.get_gpu_clock_counter = &gfx_v10_0_get_gpu_clock_counter, @@ -4214,6 +4363,7 @@ static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = {  	.read_wave_vgprs = &gfx_v10_0_read_wave_vgprs,  	.select_me_pipe_q = &gfx_v10_0_select_me_pipe_q,  	.init_spm_golden = &gfx_v10_0_init_spm_golden_registers, +	.update_perfmon_mgcg = &gfx_v10_0_update_perfmon_mgcg,  };  static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) @@ -4235,6 +4385,8 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)  		break;  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		adev->gfx.config.max_hw_contexts = 8;  		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;  		adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4328,7 +4480,8 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,  	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP  		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)  		+ ring->pipe; -	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? +	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, +							    ring->queue) ?  			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;  	/* type-2 packets are deprecated on MEC, use type-3 instead */  	r = amdgpu_ring_init(adev, ring, 1024, @@ -4358,6 +4511,8 @@ static int gfx_v10_0_sw_init(void *handle)  		break;  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		adev->gfx.me.num_me = 1;  		adev->gfx.me.num_pipe_per_me = 1;  		adev->gfx.me.num_queue_per_pipe = 1; @@ -4617,8 +4772,7 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade  	/* for ASICs that integrates GFX v10.3  	 * pa_sc_tile_steering_override should be set to 0 */ -	if (adev->asic_type == CHIP_SIENNA_CICHLID || -	    adev->asic_type == CHIP_NAVY_FLOUNDER) +	if (adev->asic_type >= CHIP_SIENNA_CICHLID)  		return 0;  	/* init num_sc */ @@ -4690,7 +4844,7 @@ static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)  	 * the driver can enable them for graphics. VMID0 should maintain  	 * access so that HWS firmware can save/restore entries.  	 */ -	for (vmid = 1; vmid < 16; vmid++) { +	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {  		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);  		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);  		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); @@ -4860,7 +5014,7 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)  	return 0;  } -void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)  {  	u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); @@ -5841,20 +5995,24 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,  {  	u32 tmp; -	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); -	if (ring->use_doorbell) { -		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, -				    DOORBELL_OFFSET, ring->doorbell_index); -		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, -				    DOORBELL_EN, 1); -	} else { -		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, -				    DOORBELL_EN, 0); +	if (!amdgpu_async_gfx_ring) { +		tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); +		if (ring->use_doorbell) { +			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, +						DOORBELL_OFFSET, ring->doorbell_index); +			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, +						DOORBELL_EN, 1); +		} else { +			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, +						DOORBELL_EN, 0); +		} +		WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);  	} -	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,  				    DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index);  		WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); @@ -5988,6 +6146,8 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)  		switch (adev->asic_type) {  		case CHIP_SIENNA_CICHLID:  		case CHIP_NAVY_FLOUNDER: +		case CHIP_VANGOGH: +		case CHIP_DIMGREY_CAVEFISH:  			WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0);  			break;  		default: @@ -5998,6 +6158,8 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)  		switch (adev->asic_type) {  		case CHIP_SIENNA_CICHLID:  		case CHIP_NAVY_FLOUNDER: +		case CHIP_VANGOGH: +		case CHIP_DIMGREY_CAVEFISH:  			WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid,  				     (CP_MEC_CNTL__MEC_ME1_HALT_MASK |  				      CP_MEC_CNTL__MEC_ME2_HALT_MASK)); @@ -6092,6 +6254,8 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);  		tmp &= 0xffffff00;  		tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); @@ -6187,6 +6351,11 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring)  				    DOORBELL_EN, 0);  	mqd->cp_rb_doorbell_control = tmp; +	/*if there are 2 gfx rings, set the lower doorbell range of the first ring, +	 *otherwise the range of the second ring will override the first ring */ +	if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1) +		gfx_v10_0_cp_gfx_set_doorbell(adev, ring); +  	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */  	ring->wptr = 0;  	mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR); @@ -6354,7 +6523,8 @@ static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct  	struct amdgpu_device *adev = ring->adev;  	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { -		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { +		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, +							      ring->queue)) {  			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;  			mqd->cp_hqd_queue_priority =  				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; @@ -6800,6 +6970,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid);  		WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0);  		WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); @@ -6812,6 +6983,8 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev)  			return false;  		}  		break; +	case CHIP_VANGOGH: +		return true;  	default:  		data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE);  		WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0); @@ -6839,6 +7012,8 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		/* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */  		data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) <<  			GRBM_CAM_DATA__CAM_ADDR__SHIFT) | @@ -6954,6 +7129,18 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev)  	WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data);  } +static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev) +{ +	uint32_t data; +	data = RREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG); +	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; +	WREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG, data); + +	data = RREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG); +	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; +	WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data); +} +  static int gfx_v10_0_hw_init(void *handle)  {  	int r; @@ -6968,7 +7155,7 @@ static int gfx_v10_0_hw_init(void *handle)  		 * loaded firstly, so in direct type, it has to load smc ucode  		 * here before rlc.  		 */ -		if (adev->smu.ppt_funcs != NULL) { +		if (adev->smu.ppt_funcs != NULL && !(adev->flags & AMD_IS_APU)) {  			r = smu_load_microcode(&adev->smu);  			if (r)  				return r; @@ -6979,6 +7166,7 @@ static int gfx_v10_0_hw_init(void *handle)  				return r;  			}  		} +		gfx_v10_0_disable_gpa_mode(adev);  	}  	/* if GRBM CAM not remapped, set up the remapping */ @@ -7136,6 +7324,8 @@ static int gfx_v10_0_soft_reset(void *handle)  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid))  			grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,  							GRBM_SOFT_RESET, @@ -7235,13 +7425,16 @@ static int gfx_v10_0_early_init(void *handle)  		break;  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid;  		break;  	default:  		break;  	} -	adev->gfx.num_compute_rings = amdgpu_num_kcq; +	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), +					  AMDGPU_MAX_COMPUTE_RINGS);  	gfx_v10_0_set_kiq_pm4_funcs(adev);  	gfx_v10_0_set_ring_funcs(adev); @@ -7288,6 +7481,8 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);  		/* wait for RLC_SAFE_MODE */ @@ -7320,6 +7515,8 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data);  		break;  	default: @@ -7344,6 +7541,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade  		/* 1 - RLC_CGTT_MGCG_OVERRIDE */  		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);  		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | +			  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |  			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |  			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |  			  RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK); @@ -7480,12 +7678,50 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade  	}  } +static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev, +						      bool enable) +{ +	uint32_t def, data; + +	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) { +		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); +		/* unset FGCG override */ +		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; +		/* update FGCG override bits */ +		if (def != data) +			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + +		def = data = RREG32_SOC15(GC, 0, mmRLC_CLK_CNTL); +		/* unset RLC SRAM CLK GATER override */ +		data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; +		/* update RLC SRAM CLK GATER override bits */ +		if (def != data) +			WREG32_SOC15(GC, 0, mmRLC_CLK_CNTL, data); +	} else { +		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); +		/* reset FGCG bits */ +		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; +		/* disable FGCG*/ +		if (def != data) +			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + +		def = data = RREG32_SOC15(GC, 0, mmRLC_CLK_CNTL); +		/* reset RLC SRAM CLK GATER bits */ +		data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; +		/* disable RLC SRAM CLK*/ +		if (def != data) +			WREG32_SOC15(GC, 0, mmRLC_CLK_CNTL, data); +	} +} +  static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,  					    bool enable)  {  	amdgpu_gfx_rlc_enter_safe_mode(adev);  	if (enable) { +		/* enable FGCG firstly*/ +		gfx_v10_0_update_fine_grain_clock_gating(adev, enable);  		/* CGCG/CGLS should be enabled after MGCG/MGLS  		 * ===  MGCG + MGLS ===  		 */ @@ -7503,6 +7739,8 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,  		gfx_v10_0_update_3d_clock_gating(adev, enable);  		/* ===  MGCG + MGLS === */  		gfx_v10_0_update_medium_grain_clock_gating(adev, enable); +		/* disable fgcg at last*/ +		gfx_v10_0_update_fine_grain_clock_gating(adev, enable);  	}  	if (adev->cg_flags & @@ -7564,6 +7802,27 @@ static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offse  	return gfx_v10_0_check_rlcg_range(adev, offset, NULL, 0);  } +static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) +{ +	u32 data = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL); + +	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) +		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; +	else +		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + +	WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, data); +} + +static void gfx_v10_cntl_pg(struct amdgpu_device *adev, bool enable) +{ +	amdgpu_gfx_rlc_enter_safe_mode(adev); + +	gfx_v10_cntl_power_gating(adev, enable); + +	amdgpu_gfx_rlc_exit_safe_mode(adev); +} +  static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {  	.is_rlc_enabled = gfx_v10_0_is_rlc_enabled,  	.set_safe_mode = gfx_v10_0_set_safe_mode, @@ -7609,8 +7868,12 @@ static int gfx_v10_0_set_powergating_state(void *handle,  	case CHIP_NAVI12:  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		amdgpu_gfx_off_ctrl(adev, enable);  		break; +	case CHIP_VANGOGH: +		gfx_v10_cntl_pg(adev, enable); +		break;  	default:  		break;  	} @@ -7631,6 +7894,8 @@ static int gfx_v10_0_set_clockgating_state(void *handle,  	case CHIP_NAVI12:  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		gfx_v10_0_update_gfx_clock_gating(adev,  						 state == AMD_CG_STATE_GATE);  		break; @@ -7645,6 +7910,11 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	int data; +	/* AMD_CG_SUPPORT_GFX_FGCG */ +	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); +	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) +		*flags |= AMD_CG_SUPPORT_GFX_FGCG; +  	/* AMD_CG_SUPPORT_GFX_MGCG */  	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));  	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) @@ -8394,6 +8664,7 @@ static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev,  		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,  			       PRIV_INSTR_INT_ENABLE,  			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); +		break;  	default:  		break;  	} @@ -8733,6 +9004,8 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev)  	case CHIP_NAVI14:  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs;  		break;  	case CHIP_NAVI12: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 79c52c7a02e3..ca74638dec9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1894,6 +1894,7 @@ static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,   * gfx_v6_0_ring_test_ib - basic ring IB test   *   * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   * Allocate an IB and execute it on the gfx ring (SI).   * Provides a basic gfx ring test to verify that IBs are working. @@ -3064,7 +3065,8 @@ static int gfx_v6_0_early_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS; -	adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS; +	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), +					  GFX6_NUM_COMPUTE_RINGS);  	adev->gfx.funcs = &gfx_v6_0_gfx_funcs;  	adev->gfx.rlc.funcs = &gfx_v6_0_rlc_funcs;  	gfx_v6_0_set_ring_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 04eaf3a8fddb..a368724c3dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1580,10 +1580,10 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)   * @adev: amdgpu_device pointer   * @se_num: shader engine to address   * @sh_num: sh block to address + * @instance: Certain registers are instanced per SE or SH. + *            0xffffffff means broadcast to all SEs or SHs (CIK).   * - * Select which SE, SH combinations to address. Certain - * registers are instanced per SE or SH.  0xffffffff means - * broadcast to all SEs or SHs (CIK). + * Select which SE, SH combinations to address.   */  static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,  				  u32 se_num, u32 sh_num, u32 instance) @@ -1779,8 +1779,6 @@ gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,   * gfx_v7_0_setup_rb - setup the RBs on the asic   *   * @adev: amdgpu_device pointer - * @se_num: number of SEs (shader engines) for the asic - * @sh_per_se: number of SH blocks per SE for the asic   *   * Configures per-SE/SH RB registers (CIK).   */ @@ -1841,6 +1839,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)  	mutex_unlock(&adev->grbm_idx_mutex);  } +#define DEFAULT_SH_MEM_BASES	(0x6000)  /**   * gfx_v7_0_init_compute_vmid - gart enable   * @@ -1849,7 +1848,6 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)   * Initialize compute vmid sh_mem registers   *   */ -#define DEFAULT_SH_MEM_BASES	(0x6000)  static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)  {  	int i; @@ -1898,7 +1896,7 @@ static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev)  	 * the driver can enable them for graphics. VMID0 should maintain  	 * access so that HWS firmware can save/restore entries.  	 */ -	for (vmid = 1; vmid < 16; vmid++) { +	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {  		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);  		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);  		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); @@ -2074,7 +2072,6 @@ static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)  /**   * gfx_v7_0_ring_test_ring - basic gfx ring test   * - * @adev: amdgpu_device pointer   * @ring: amdgpu_ring structure holding ring information   *   * Allocate a scratch register and write to it using the gfx ring (CIK). @@ -2121,8 +2118,7 @@ error_free_scratch:  /**   * gfx_v7_0_ring_emit_hdp - emit an hdp flush on the cp   * - * @adev: amdgpu_device pointer - * @ridx: amdgpu ring index + * @ring: amdgpu_ring structure holding ring information   *   * Emits an hdp flush on the cp.   */ @@ -2171,8 +2167,10 @@ static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)  /**   * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring   * - * @adev: amdgpu_device pointer - * @fence: amdgpu fence object + * @ring: amdgpu_ring structure holding ring information + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Emits a fence sequnce number on the gfx ring and flushes   * GPU caches. @@ -2212,8 +2210,10 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,  /**   * gfx_v7_0_ring_emit_fence_compute - emit a fence on the compute ring   * - * @adev: amdgpu_device pointer - * @fence: amdgpu fence object + * @ring: amdgpu_ring structure holding ring information + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Emits a fence sequnce number on the compute ring and flushes   * GPU caches. @@ -2245,7 +2245,9 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,   * gfx_v7_0_ring_emit_ib - emit an IB (Indirect Buffer) on the ring   *   * @ring: amdgpu_ring structure holding ring information + * @job: job to retrive vmid from   * @ib: amdgpu indirect buffer object + * @flags: options (AMDGPU_HAVE_CTX_SWITCH)   *   * Emits an DE (drawing engine) or CE (constant engine) IB   * on the gfx ring.  IBs are usually generated by userspace @@ -2342,6 +2344,7 @@ static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)   * gfx_v7_0_ring_test_ib - basic ring IB test   *   * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   * Allocate an IB and execute it on the gfx ring (CIK).   * Provides a basic gfx ring test to verify that IBs are working. @@ -3234,7 +3237,9 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)  /**   * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP   * - * @adev: amdgpu_device pointer + * @ring: amdgpu_ring pointer + * @vmid: vmid number to use + * @pd_addr: address   *   * Update the page table base and flush the VM TLB   * using the CP (CIK). @@ -4238,7 +4243,8 @@ static int gfx_v7_0_early_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; -	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; +	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), +					  AMDGPU_MAX_COMPUTE_RINGS);  	adev->gfx.funcs = &gfx_v7_0_gfx_funcs;  	adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;  	gfx_v7_0_set_ring_funcs(adev); @@ -5207,15 +5213,6 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)  	cu_info->lds_size = 64;  } -static const struct amdgpu_ip_block_version gfx_v7_0_ip_block = -{ -	.type = AMD_IP_BLOCK_TYPE_GFX, -	.major = 7, -	.minor = 0, -	.rev = 0, -	.funcs = &gfx_v7_0_ip_funcs, -}; -  const struct amdgpu_ip_block_version gfx_v7_1_ip_block =  {  	.type = AMD_IP_BLOCK_TYPE_GFX, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h index 6fb9c1524691..eedce7d007f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h @@ -24,7 +24,6 @@  #ifndef __GFX_V7_0_H__  #define __GFX_V7_0_H__ -extern const struct amdgpu_ip_block_version gfx_v7_0_ip_block;  extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block;  extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block;  extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 94b7e0531d09..37639214cbbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -729,8 +729,13 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);  static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);  static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); +#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL +#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L +  static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)  { +	uint32_t data; +  	switch (adev->asic_type) {  	case CHIP_TOPAZ:  		amdgpu_device_program_register_sequence(adev, @@ -790,11 +795,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)  		amdgpu_device_program_register_sequence(adev,  							polaris10_golden_common_all,  							ARRAY_SIZE(polaris10_golden_common_all)); -		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); -		if (adev->pdev->revision == 0xc7 && +		data = RREG32_SMC(ixCG_ACLK_CNTL); +		data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK; +		data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT; +		WREG32_SMC(ixCG_ACLK_CNTL, data); +		if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&  		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||  		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || -		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { +		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {  			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);  			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);  		} @@ -1915,7 +1923,8 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,  		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)  		+ ring->pipe; -	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? +	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, +							    ring->queue) ?  			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;  	/* type-2 packets are deprecated on MEC, use type-3 instead */  	r = amdgpu_ring_init(adev, ring, 1024, @@ -3678,6 +3687,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)  	mutex_unlock(&adev->grbm_idx_mutex);  } +#define DEFAULT_SH_MEM_BASES	(0x6000)  /**   * gfx_v8_0_init_compute_vmid - gart enable   * @@ -3686,7 +3696,6 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)   * Initialize compute vmid sh_mem registers   *   */ -#define DEFAULT_SH_MEM_BASES	(0x6000)  static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)  {  	int i; @@ -3740,7 +3749,7 @@ static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)  	 * the driver can enable them for graphics. VMID0 should maintain  	 * access so that HWS firmware can save/restore entries.  	 */ -	for (vmid = 1; vmid < 16; vmid++) { +	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {  		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);  		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);  		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); @@ -4433,7 +4442,8 @@ static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *m  	struct amdgpu_device *adev = ring->adev;  	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { -		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { +		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, +							      ring->queue)) {  			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;  			mqd->cp_hqd_queue_priority =  				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; @@ -5058,7 +5068,7 @@ static int gfx_v8_0_pre_soft_reset(void *handle)  		gfx_v8_0_cp_compute_enable(adev, false);  	} -       return 0; +	return 0;  }  static int gfx_v8_0_soft_reset(void *handle) @@ -5295,7 +5305,8 @@ static int gfx_v8_0_early_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; -	adev->gfx.num_compute_rings = amdgpu_num_kcq; +	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), +					  AMDGPU_MAX_COMPUTE_RINGS);  	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;  	gfx_v8_0_set_ring_funcs(adev);  	gfx_v8_0_set_irq_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6959aebae6d4..5f4805e4d04a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -117,6 +117,13 @@ MODULE_FIRMWARE("amdgpu/renoir_mec.bin");  MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");  MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); +  #define mmTCP_CHAN_STEER_0_ARCT								0x0b03  #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0  #define mmTCP_CHAN_STEER_1_ARCT								0x0b04 @@ -787,7 +794,7 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);  static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);  static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);  static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, -                                 struct amdgpu_cu_info *cu_info); +				struct amdgpu_cu_info *cu_info);  static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);  static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);  static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); @@ -1630,14 +1637,17 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)  		chip_name = "arcturus";  		break;  	case CHIP_RENOIR: -		chip_name = "renoir"; +		if (adev->apu_flags & AMD_APU_IS_RENOIR) +			chip_name = "renoir"; +		else +			chip_name = "green_sardine";  		break;  	default:  		BUG();  	}  	/* No CPG in Arcturus */ -	if (adev->asic_type != CHIP_ARCTURUS) { +	if (adev->gfx.num_gfx_rings) {  		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);  		if (r)  			return r; @@ -2218,7 +2228,8 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,  	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP  		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)  		+ ring->pipe; -	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? +	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, +							    ring->queue) ?  			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;  	/* type-2 packets are deprecated on MEC, use type-3 instead */  	return amdgpu_ring_init(adev, ring, 1024, @@ -2509,7 +2520,7 @@ static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)  	 * the driver can enable them for graphics. VMID0 should maintain  	 * access so that HWS firmware can save/restore entries.  	 */ -	for (vmid = 1; vmid < 16; vmid++) { +	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {  		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);  		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);  		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); @@ -2622,7 +2633,14 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)  static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,  					       bool enable)  { -	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); +	u32 tmp; + +	/* don't toggle interrupts that are only applicable +	 * to me0 pipe0 on AISCs that have me0 removed */ +	if (!adev->gfx.num_gfx_rings) +		return; + +	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);  	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);  	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); @@ -2982,7 +3000,7 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)  	}  } -void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)  {  	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);  	gfx_v9_0_enable_gui_idle_interrupt(adev, false); @@ -3373,7 +3391,9 @@ static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *m  	struct amdgpu_device *adev = ring->adev;  	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { -		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { +		if (amdgpu_gfx_is_high_priority_compute_queue(adev, +							      ring->pipe, +							      ring->queue)) {  			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;  			mqd->cp_hqd_queue_priority =  				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; @@ -3809,7 +3829,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)  		gfx_v9_0_enable_gui_idle_interrupt(adev, false);  	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { -		if (adev->asic_type != CHIP_ARCTURUS) { +		if (adev->gfx.num_gfx_rings) {  			/* legacy firmware loading */  			r = gfx_v9_0_cp_gfx_load_microcode(adev);  			if (r) @@ -3825,7 +3845,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)  	if (r)  		return r; -	if (adev->asic_type != CHIP_ARCTURUS) { +	if (adev->gfx.num_gfx_rings) {  		r = gfx_v9_0_cp_gfx_resume(adev);  		if (r)  			return r; @@ -3835,7 +3855,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)  	if (r)  		return r; -	if (adev->asic_type != CHIP_ARCTURUS) { +	if (adev->gfx.num_gfx_rings) {  		ring = &adev->gfx.gfx_ring[0];  		r = amdgpu_ring_test_helper(ring);  		if (r) @@ -3871,7 +3891,7 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)  static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)  { -	if (adev->asic_type != CHIP_ARCTURUS) +	if (adev->gfx.num_gfx_rings)  		gfx_v9_0_cp_gfx_enable(adev, enable);  	gfx_v9_0_cp_compute_enable(adev, enable);  } @@ -4012,7 +4032,7 @@ static int gfx_v9_0_soft_reset(void *handle)  		/* stop the rlc */  		adev->gfx.rlc.funcs->stop(adev); -		if (adev->asic_type != CHIP_ARCTURUS) +		if (adev->gfx.num_gfx_rings)  			/* Disable GFX parsing/prefetching */  			gfx_v9_0_cp_gfx_enable(adev, false); @@ -4623,7 +4643,8 @@ static int gfx_v9_0_early_init(void *handle)  		adev->gfx.num_gfx_rings = 0;  	else  		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; -	adev->gfx.num_compute_rings = amdgpu_num_kcq; +	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), +					  AMDGPU_MAX_COMPUTE_RINGS);  	gfx_v9_0_set_kiq_pm4_funcs(adev);  	gfx_v9_0_set_ring_funcs(adev);  	gfx_v9_0_set_irq_funcs(adev); @@ -5167,7 +5188,7 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)  	if (ring->use_doorbell) {  		/* XXX check if swapping is necessary on BE */ -		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); +		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);  		WDOORBELL64(ring->doorbell_index, ring->wptr);  	} else {  		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); @@ -5353,7 +5374,7 @@ static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)  	/* XXX check if swapping is necessary on BE */  	if (ring->use_doorbell) { -		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); +		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);  		WDOORBELL64(ring->doorbell_index, ring->wptr);  	} else{  		BUG(); /* only DOORBELL method supported on gfx9 now */ @@ -5673,6 +5694,7 @@ static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,  		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,  			       PRIV_INSTR_INT_ENABLE,  			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); +		break;  	default:  		break;  	} diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index fad887a66886..6ddd53ba8b77 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -22,6 +22,7 @@   */  #include "amdgpu.h"  #include "gfxhub_v1_0.h" +#include "gfxhub_v1_1.h"  #include "gc/gc_9_0_offset.h"  #include "gc/gc_9_0_sh_mask.h" @@ -30,13 +31,14 @@  #include "soc15_common.h" -u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) +static u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)  {  	return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24;  } -void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, -				uint64_t page_table_base) +static void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, +					 uint32_t vmid, +					 uint64_t page_table_base)  {  	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; @@ -274,7 +276,7 @@ static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev)  	}  } -int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) +static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)  {  	if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) {  		/* @@ -304,7 +306,7 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)  	return 0;  } -void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) +static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)  {  	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];  	u32 tmp; @@ -335,8 +337,8 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)   * @adev: amdgpu_device pointer   * @value: true redirects VM faults to the default page   */ -void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, -					  bool value) +static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, +						 bool value)  {  	u32 tmp;  	tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); @@ -373,7 +375,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,  	WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);  } -void gfxhub_v1_0_init(struct amdgpu_device *adev) +static void gfxhub_v1_0_init(struct amdgpu_device *adev)  {  	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; @@ -412,4 +414,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = {  	.gart_disable = gfxhub_v1_0_gart_disable,  	.set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default,  	.init = gfxhub_v1_0_init, +	.get_xgmi_info = gfxhub_v1_1_get_xgmi_info,  }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h index 0c46672bbf49..3174bc5766fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h @@ -24,14 +24,6 @@  #ifndef __GFXHUB_V1_0_H__  #define __GFXHUB_V1_0_H__ -int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev); -void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev); -void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, -					  bool value); -void gfxhub_v1_0_init(struct amdgpu_device *adev); -u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); -void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, -				uint64_t page_table_base); -  extern const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs; +  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c index 1e24b6d51e41..c0ab71df0d90 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c @@ -21,7 +21,6 @@   *   */  #include "amdgpu.h" -#include "gfxhub_v1_0.h"  #include "gfxhub_v1_1.h"  #include "gc/gc_9_2_1_offset.h" @@ -29,7 +28,7 @@  #include "soc15_common.h" -static int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) +int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)  {  	u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);  	u32 max_region = @@ -67,13 +66,3 @@ static int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)  	return 0;  } - -const struct amdgpu_gfxhub_funcs gfxhub_v1_1_funcs = { -	.get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, -	.setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs, -	.gart_enable = gfxhub_v1_0_gart_enable, -	.gart_disable = gfxhub_v1_0_gart_disable, -	.set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, -	.init = gfxhub_v1_0_init, -	.get_xgmi_info = gfxhub_v1_1_get_xgmi_info, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h index ae5759ffbee3..d753cf28a0a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h @@ -24,6 +24,6 @@  #ifndef __GFXHUB_V1_1_H__  #define __GFXHUB_V1_1_H__ -extern const struct amdgpu_gfxhub_funcs gfxhub_v1_1_funcs; +int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 456360bf58fa..2aecc6a243e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -153,16 +153,16 @@ static void gfxhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)  	uint64_t value;  	if (!amdgpu_sriov_vf(adev)) { -		/* Disable AGP. */ +		/* Program the AGP BAR */  		WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); -		WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0); -		WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF); +		WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); +		WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);  		/* Program the system aperture low logical page number. */  		WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, -			     adev->gmc.vram_start >> 18); +			     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);  		WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, -			     adev->gmc.vram_end >> 18); +			     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);  		/* Set default page address. */  		value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 724bb29e9bb4..410fd3a1a388 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -152,16 +152,16 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev)  {  	uint64_t value; -	/* Disable AGP. */ +	/* Program the AGP BAR */  	WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); -	WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0); -	WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF); +	WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); +	WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);  	/* Program the system aperture low logical page number. */  	WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, -		     adev->gmc.vram_start >> 18); +		     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);  	WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, -		     adev->gmc.vram_end >> 18); +		     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);  	/* Set default page address. */  	value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index dbc8b76b9b78..5648c48be77f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -46,6 +46,7 @@  #include "gfxhub_v2_0.h"  #include "gfxhub_v2_1.h"  #include "mmhub_v2_0.h" +#include "mmhub_v2_3.h"  #include "athub_v2_0.h"  #include "athub_v2_1.h" @@ -93,44 +94,72 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,  				       struct amdgpu_irq_src *source,  				       struct amdgpu_iv_entry *entry)  { +	bool retry_fault = !!(entry->src_data[1] & 0x80);  	struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; +	struct amdgpu_task_info task_info;  	uint32_t status = 0;  	u64 addr;  	addr = (u64)entry->src_data[0] << 12;  	addr |= ((u64)entry->src_data[1] & 0xf) << 44; +	if (retry_fault) { +		/* Returning 1 here also prevents sending the IV to the KFD */ + +		/* Process it onyl if it's the first fault for this address */ +		if (entry->ih != &adev->irq.ih_soft && +		    amdgpu_gmc_filter_faults(adev, addr, entry->pasid, +					     entry->timestamp)) +			return 1; + +		/* Delegate it to a different ring if the hardware hasn't +		 * already done it. +		 */ +		if (in_interrupt()) { +			amdgpu_irq_delegate(adev, entry, 8); +			return 1; +		} + +		/* Try to handle the recoverable page faults by filling page +		 * tables +		 */ +		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr)) +			return 1; +	} +  	if (!amdgpu_sriov_vf(adev)) {  		/*  		 * Issue a dummy read to wait for the status register to  		 * be updated to avoid reading an incorrect value due to  		 * the new fast GRBM interface.  		 */ -		if (entry->vmid_src == AMDGPU_GFXHUB_0) +		if ((entry->vmid_src == AMDGPU_GFXHUB_0) && +		    (adev->asic_type < CHIP_SIENNA_CICHLID))  			RREG32(hub->vm_l2_pro_fault_status);  		status = RREG32(hub->vm_l2_pro_fault_status);  		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);  	} -	if (printk_ratelimit()) { -		struct amdgpu_task_info task_info; - -		memset(&task_info, 0, sizeof(struct amdgpu_task_info)); -		amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - -		dev_err(adev->dev, -			"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " -			"for process %s pid %d thread %s pid %d)\n", -			entry->vmid_src ? "mmhub" : "gfxhub", -			entry->src_id, entry->ring_id, entry->vmid, -			entry->pasid, task_info.process_name, task_info.tgid, -			task_info.task_name, task_info.pid); -		dev_err(adev->dev, "  in page starting at address 0x%016llx from client %d\n", -			addr, entry->client_id); -		if (!amdgpu_sriov_vf(adev)) -			hub->vmhub_funcs->print_l2_protection_fault_status(adev, status); -	} +	if (!printk_ratelimit()) +		return 0; + +	memset(&task_info, 0, sizeof(struct amdgpu_task_info)); +	amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + +	dev_err(adev->dev, +		"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " +		"for process %s pid %d thread %s pid %d)\n", +		entry->vmid_src ? "mmhub" : "gfxhub", +		entry->src_id, entry->ring_id, entry->vmid, +		entry->pasid, task_info.process_name, task_info.tgid, +		task_info.task_name, task_info.pid); +	dev_err(adev->dev, "  in page starting at address 0x%012llx from client %d\n", +		addr, entry->client_id); + +	if (!amdgpu_sriov_vf(adev)) +		hub->vmhub_funcs->print_l2_protection_fault_status(adev, +								   status);  	return 0;  } @@ -145,7 +174,7 @@ static const struct amdgpu_irq_src_funcs gmc_v10_0_ecc_funcs = {  	.process = amdgpu_umc_process_ecc_irq,  }; - static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev) +static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev)  {  	adev->gmc.vm_fault.num_types = 1;  	adev->gmc.vm_fault.funcs = &gmc_v10_0_irq_funcs; @@ -231,7 +260,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,  	 * Issue a dummy read to wait for the ACK register to be cleared  	 * to avoid a false ACK due to the new fast GRBM interface.  	 */ -	if (vmhub == AMDGPU_GFXHUB_0) +	if ((vmhub == AMDGPU_GFXHUB_0) && +	    (adev->asic_type < CHIP_SIENNA_CICHLID))  		RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng);  	/* Wait for ACK with a delay.*/ @@ -267,6 +297,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,   *   * @adev: amdgpu_device pointer   * @vmid: vm instance to flush + * @vmhub: vmhub type + * @flush_type: the flush type   *   * Flush the TLB for the requested page table.   */ @@ -359,6 +391,8 @@ error_alloc:   *   * @adev: amdgpu_device pointer   * @pasid: pasid to be flush + * @flush_type: the flush type + * @all_hub: Used with PACKET3_INVALIDATE_TLBS_ALL_HUB()   *   * Flush the TLB for the requested pasid.   */ @@ -398,7 +432,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,  		return 0;  	} -	for (vmid = 1; vmid < 16; vmid++) { +	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {  		ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,  				&queried_pasid); @@ -483,7 +517,8 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid  /*   * PTE format on NAVI 10:   * 63:59 reserved - * 58:57 reserved + * 58 reserved and for sienna_cichlid is used for MALL noalloc + * 57 reserved   * 56 F   * 55 L   * 54 reserved @@ -631,7 +666,14 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)  static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)  { -	adev->mmhub.funcs = &mmhub_v2_0_funcs; +	switch (adev->asic_type) { +	case CHIP_VANGOGH: +		adev->mmhub.funcs = &mmhub_v2_3_funcs; +		break; +	default: +		adev->mmhub.funcs = &mmhub_v2_0_funcs; +		break; +	}  }  static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) @@ -639,6 +681,8 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		adev->gfxhub.funcs = &gfxhub_v2_1_funcs;  		break;  	default: @@ -673,8 +717,6 @@ static int gmc_v10_0_late_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	int r; -	amdgpu_bo_late_init(adev); -  	r = amdgpu_gmc_allocate_vm_inv_eng(adev);  	if (r)  		return r; @@ -698,6 +740,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,  	amdgpu_gmc_vram_location(adev, &adev->gmc, base);  	amdgpu_gmc_gart_location(adev, mc); +	amdgpu_gmc_agp_location(adev, mc);  	/* base offset of vram pages */  	adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); @@ -733,6 +776,13 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)  	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);  	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); +#ifdef CONFIG_X86_64 +	if (adev->flags & AMD_IS_APU) { +		adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev); +		adev->gmc.aper_size = adev->gmc.real_vram_size; +	} +#endif +  	/* In case the PCI BAR is larger than the actual amount of vram */  	adev->gmc.visible_vram_size = adev->gmc.aper_size;  	if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) @@ -746,6 +796,8 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)  		case CHIP_NAVI12:  		case CHIP_SIENNA_CICHLID:  		case CHIP_NAVY_FLOUNDER: +		case CHIP_VANGOGH: +		case CHIP_DIMGREY_CAVEFISH:  		default:  			adev->gmc.gart_size = 512ULL << 20;  			break; @@ -790,7 +842,10 @@ static int gmc_v10_0_sw_init(void *handle)  	spin_lock_init(&adev->gmc.invalidate_lock); -	if (adev->asic_type == CHIP_SIENNA_CICHLID && amdgpu_emu_mode == 1) { +	if ((adev->flags & AMD_IS_APU) && amdgpu_emu_mode == 1) { +		adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4; +		adev->gmc.vram_width = 64; +	} else if (amdgpu_emu_mode == 1) {  		adev->gmc.vram_type = AMDGPU_VRAM_TYPE_GDDR6;  		adev->gmc.vram_width = 1 * 128; /* numchan * chansize */  	} else { @@ -808,6 +863,8 @@ static int gmc_v10_0_sw_init(void *handle)  	case CHIP_NAVI12:  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		adev->num_vmhubs = 2;  		/*  		 * To fulfill 4-level page support, @@ -921,6 +978,8 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev)  	case CHIP_NAVI12:  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		break;  	default:  		break; @@ -1081,8 +1140,8 @@ static int gmc_v10_0_set_clockgating_state(void *handle,  	if (r)  		return r; -	if (adev->asic_type == CHIP_SIENNA_CICHLID || -	    adev->asic_type == CHIP_NAVY_FLOUNDER) +	if (adev->asic_type >= CHIP_SIENNA_CICHLID && +	    adev->asic_type <= CHIP_DIMGREY_CAVEFISH)  		return athub_v2_1_set_clockgating(adev, state);  	else  		return athub_v2_0_set_clockgating(adev, state); @@ -1094,8 +1153,8 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)  	adev->mmhub.funcs->get_clockgating(adev, flags); -	if (adev->asic_type == CHIP_SIENNA_CICHLID || -	    adev->asic_type == CHIP_NAVY_FLOUNDER) +	if (adev->asic_type >= CHIP_SIENNA_CICHLID && +	    adev->asic_type <= CHIP_DIMGREY_CAVEFISH)  		athub_v2_1_get_clockgating(adev, flags);  	else  		athub_v2_0_get_clockgating(adev, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 95a9117e9564..f5b69484c45a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -530,7 +530,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)  	 * the VMs are determined by the application and setup and assigned  	 * on the fly in the vm part of radeon_gart.c  	 */ -	for (i = 1; i < 16; i++) { +	for (i = 1; i < AMDGPU_NUM_VMID; i++) {  		if (i < 8)  			WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,  			       table_addr >> 12); @@ -791,8 +791,6 @@ static int gmc_v6_0_late_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	amdgpu_bo_late_init(adev); -  	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)  		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);  	else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 80c146df338a..dee2b34effb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -424,6 +424,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)   *   * @adev: amdgpu_device pointer   * @pasid: pasid to be flush + * @flush_type: type of flush + * @all_hub: flush all hubs   *   * Flush the TLB for the requested pasid.   */ @@ -463,7 +465,9 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,   *   * @adev: amdgpu_device pointer   * @vmid: vm instance to flush - * + * @vmhub: which hub to flush + * @flush_type: type of flush + * *   * Flush the TLB for the requested page table (CIK).   */  static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, @@ -673,7 +677,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)  	/* set vm size, must be a multiple of 4 */  	WREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);  	WREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR, adev->vm_manager.max_pfn - 1); -	for (i = 1; i < 16; i++) { +	for (i = 1; i < AMDGPU_NUM_VMID; i++) {  		if (i < 8)  			WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,  			       table_addr >> 12); @@ -763,6 +767,7 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)   * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value   * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value   * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value + * @pasid: debug logging only - no functional use   *   * Print human readable fault information (CIK).   */ @@ -956,8 +961,6 @@ static int gmc_v7_0_late_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	amdgpu_bo_late_init(adev); -  	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)  		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);  	else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 9ab65ca7df77..2d832fc23119 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -230,36 +230,20 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)  		chip_name = "tonga";  		break;  	case CHIP_POLARIS11: -		if (((adev->pdev->device == 0x67ef) && -		     ((adev->pdev->revision == 0xe0) || -		      (adev->pdev->revision == 0xe5))) || -		    ((adev->pdev->device == 0x67ff) && -		     ((adev->pdev->revision == 0xcf) || -		      (adev->pdev->revision == 0xef) || -		      (adev->pdev->revision == 0xff)))) -			chip_name = "polaris11_k"; -		else if ((adev->pdev->device == 0x67ef) && -			 (adev->pdev->revision == 0xe2)) +		if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision) || +		    ASICID_IS_P31(adev->pdev->device, adev->pdev->revision))  			chip_name = "polaris11_k";  		else  			chip_name = "polaris11";  		break;  	case CHIP_POLARIS10: -		if ((adev->pdev->device == 0x67df) && -		    ((adev->pdev->revision == 0xe1) || -		     (adev->pdev->revision == 0xf7))) +		if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision))  			chip_name = "polaris10_k";  		else  			chip_name = "polaris10";  		break;  	case CHIP_POLARIS12: -		if (((adev->pdev->device == 0x6987) && -		     ((adev->pdev->revision == 0xc0) || -		      (adev->pdev->revision == 0xc3))) || -		    ((adev->pdev->device == 0x6981) && -		     ((adev->pdev->revision == 0x00) || -		      (adev->pdev->revision == 0x01) || -		      (adev->pdev->revision == 0x10)))) +		if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision))  			chip_name = "polaris12_k";  		else  			chip_name = "polaris12"; @@ -625,6 +609,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)   *   * @adev: amdgpu_device pointer   * @pasid: pasid to be flush + * @flush_type: type of flush + * @all_hub: flush all hubs   *   * Flush the TLB for the requested pasid.   */ @@ -665,6 +651,8 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,   *   * @adev: amdgpu_device pointer   * @vmid: vm instance to flush + * @vmhub: which hub to flush + * @flush_type: type of flush   *   * Flush the TLB for the requested page table (VI).   */ @@ -915,7 +903,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)  	/* set vm size, must be a multiple of 4 */  	WREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);  	WREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR, adev->vm_manager.max_pfn - 1); -	for (i = 1; i < 16; i++) { +	for (i = 1; i < AMDGPU_NUM_VMID; i++) {  		if (i < 8)  			WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,  			       table_addr >> 12); @@ -1006,6 +994,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)   * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value   * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value   * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value + * @pasid: debug logging only - no functional use   *   * Print human readable fault information (VI).   */ @@ -1073,8 +1062,6 @@ static int gmc_v8_0_late_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	amdgpu_bo_late_init(adev); -  	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)  		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);  	else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3ebbddb63705..e22268f9dba7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -379,41 +379,6 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {  	(0x001d43e0 + 0x00001800),  }; -static const uint32_t ecc_umc_mcumc_status_addrs[] = { -	(0x000143c2 + 0x00000000), -	(0x000143c2 + 0x00000800), -	(0x000143c2 + 0x00001000), -	(0x000143c2 + 0x00001800), -	(0x000543c2 + 0x00000000), -	(0x000543c2 + 0x00000800), -	(0x000543c2 + 0x00001000), -	(0x000543c2 + 0x00001800), -	(0x000943c2 + 0x00000000), -	(0x000943c2 + 0x00000800), -	(0x000943c2 + 0x00001000), -	(0x000943c2 + 0x00001800), -	(0x000d43c2 + 0x00000000), -	(0x000d43c2 + 0x00000800), -	(0x000d43c2 + 0x00001000), -	(0x000d43c2 + 0x00001800), -	(0x001143c2 + 0x00000000), -	(0x001143c2 + 0x00000800), -	(0x001143c2 + 0x00001000), -	(0x001143c2 + 0x00001800), -	(0x001543c2 + 0x00000000), -	(0x001543c2 + 0x00000800), -	(0x001543c2 + 0x00001000), -	(0x001543c2 + 0x00001800), -	(0x001943c2 + 0x00000000), -	(0x001943c2 + 0x00000800), -	(0x001943c2 + 0x00001000), -	(0x001943c2 + 0x00001800), -	(0x001d43c2 + 0x00000000), -	(0x001d43c2 + 0x00000800), -	(0x001d43c2 + 0x00001000), -	(0x001d43c2 + 0x00001800), -}; -  static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,  		struct amdgpu_irq_src *src,  		unsigned type, @@ -502,6 +467,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,  				WREG32(reg, tmp);  			}  		} +		break;  	default:  		break;  	} @@ -510,122 +476,136 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,  }  static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, -				struct amdgpu_irq_src *source, -				struct amdgpu_iv_entry *entry) +				      struct amdgpu_irq_src *source, +				      struct amdgpu_iv_entry *entry)  { -	struct amdgpu_vmhub *hub;  	bool retry_fault = !!(entry->src_data[1] & 0x80);  	uint32_t status = 0, cid = 0, rw = 0; -	u64 addr; -	char hub_name[10]; +	struct amdgpu_task_info task_info; +	struct amdgpu_vmhub *hub;  	const char *mmhub_cid; +	const char *hub_name; +	u64 addr;  	addr = (u64)entry->src_data[0] << 12;  	addr |= ((u64)entry->src_data[1] & 0xf) << 44; -	if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid, -						    entry->timestamp)) -		return 1; /* This also prevents sending it to KFD */ +	if (retry_fault) { +		/* Returning 1 here also prevents sending the IV to the KFD */ + +		/* Process it onyl if it's the first fault for this address */ +		if (entry->ih != &adev->irq.ih_soft && +		    amdgpu_gmc_filter_faults(adev, addr, entry->pasid, +					     entry->timestamp)) +			return 1; + +		/* Delegate it to a different ring if the hardware hasn't +		 * already done it. +		 */ +		if (in_interrupt()) { +			amdgpu_irq_delegate(adev, entry, 8); +			return 1; +		} + +		/* Try to handle the recoverable page faults by filling page +		 * tables +		 */ +		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr)) +			return 1; +	} + +	if (!printk_ratelimit()) +		return 0;  	if (entry->client_id == SOC15_IH_CLIENTID_VMC) { -		snprintf(hub_name, sizeof(hub_name), "mmhub0"); +		hub_name = "mmhub0";  		hub = &adev->vmhub[AMDGPU_MMHUB_0];  	} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { -		snprintf(hub_name, sizeof(hub_name), "mmhub1"); +		hub_name = "mmhub1";  		hub = &adev->vmhub[AMDGPU_MMHUB_1];  	} else { -		snprintf(hub_name, sizeof(hub_name), "gfxhub0"); +		hub_name = "gfxhub0";  		hub = &adev->vmhub[AMDGPU_GFXHUB_0];  	} -	/* If it's the first fault for this address, process it normally */ -	if (retry_fault && !in_interrupt() && -	    amdgpu_vm_handle_fault(adev, entry->pasid, addr)) -		return 1; /* This also prevents sending it to KFD */ +	memset(&task_info, 0, sizeof(struct amdgpu_task_info)); +	amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); -	if (!amdgpu_sriov_vf(adev)) { -		/* -		 * Issue a dummy read to wait for the status register to -		 * be updated to avoid reading an incorrect value due to -		 * the new fast GRBM interface. -		 */ -		if (entry->vmid_src == AMDGPU_GFXHUB_0) -			RREG32(hub->vm_l2_pro_fault_status); - -		status = RREG32(hub->vm_l2_pro_fault_status); -		cid = REG_GET_FIELD(status, -				    VM_L2_PROTECTION_FAULT_STATUS, CID); -		rw = REG_GET_FIELD(status, -				   VM_L2_PROTECTION_FAULT_STATUS, RW); -		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); -	} +	dev_err(adev->dev, +		"[%s] %s page fault (src_id:%u ring:%u vmid:%u " +		"pasid:%u, for process %s pid %d thread %s pid %d)\n", +		hub_name, retry_fault ? "retry" : "no-retry", +		entry->src_id, entry->ring_id, entry->vmid, +		entry->pasid, task_info.process_name, task_info.tgid, +		task_info.task_name, task_info.pid); +	dev_err(adev->dev, "  in page starting at address 0x%012llx from client %d\n", +		addr, entry->client_id); -	if (printk_ratelimit()) { -		struct amdgpu_task_info task_info; - -		memset(&task_info, 0, sizeof(struct amdgpu_task_info)); -		amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - -		dev_err(adev->dev, -			"[%s] %s page fault (src_id:%u ring:%u vmid:%u " -			"pasid:%u, for process %s pid %d thread %s pid %d)\n", -			hub_name, retry_fault ? "retry" : "no-retry", -			entry->src_id, entry->ring_id, entry->vmid, -			entry->pasid, task_info.process_name, task_info.tgid, -			task_info.task_name, task_info.pid); -		dev_err(adev->dev, "  in page starting at address 0x%016llx from client %d\n", -			addr, entry->client_id); -		if (!amdgpu_sriov_vf(adev)) { -			dev_err(adev->dev, -				"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", -				status); -			if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) { -				dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", -					cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid], -					cid); -			} else { -				switch (adev->asic_type) { -				case CHIP_VEGA10: -					mmhub_cid = mmhub_client_ids_vega10[cid][rw]; -					break; -				case CHIP_VEGA12: -					mmhub_cid = mmhub_client_ids_vega12[cid][rw]; -					break; -				case CHIP_VEGA20: -					mmhub_cid = mmhub_client_ids_vega20[cid][rw]; -					break; -				case CHIP_ARCTURUS: -					mmhub_cid = mmhub_client_ids_arcturus[cid][rw]; -					break; -				case CHIP_RAVEN: -					mmhub_cid = mmhub_client_ids_raven[cid][rw]; -					break; -				case CHIP_RENOIR: -					mmhub_cid = mmhub_client_ids_renoir[cid][rw]; -					break; -				default: -					mmhub_cid = NULL; -					break; -				} -				dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", -					mmhub_cid ? mmhub_cid : "unknown", cid); -			} -			dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", -				REG_GET_FIELD(status, -				VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); -			dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", -				REG_GET_FIELD(status, -				VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); -			dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", -				REG_GET_FIELD(status, -				VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); -			dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", -				REG_GET_FIELD(status, -				VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); -			dev_err(adev->dev, "\t RW: 0x%x\n", rw); +	if (amdgpu_sriov_vf(adev)) +		return 0; + +	/* +	 * Issue a dummy read to wait for the status register to +	 * be updated to avoid reading an incorrect value due to +	 * the new fast GRBM interface. +	 */ +	if (entry->vmid_src == AMDGPU_GFXHUB_0) +		RREG32(hub->vm_l2_pro_fault_status); + +	status = RREG32(hub->vm_l2_pro_fault_status); +	cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID); +	rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); +	WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + +	dev_err(adev->dev, +		"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", +		status); +	if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) { +		dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", +			cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : +			gfxhub_client_ids[cid], +			cid); +	} else { +		switch (adev->asic_type) { +		case CHIP_VEGA10: +			mmhub_cid = mmhub_client_ids_vega10[cid][rw]; +			break; +		case CHIP_VEGA12: +			mmhub_cid = mmhub_client_ids_vega12[cid][rw]; +			break; +		case CHIP_VEGA20: +			mmhub_cid = mmhub_client_ids_vega20[cid][rw]; +			break; +		case CHIP_ARCTURUS: +			mmhub_cid = mmhub_client_ids_arcturus[cid][rw]; +			break; +		case CHIP_RAVEN: +			mmhub_cid = mmhub_client_ids_raven[cid][rw]; +			break; +		case CHIP_RENOIR: +			mmhub_cid = mmhub_client_ids_renoir[cid][rw]; +			break; +		default: +			mmhub_cid = NULL; +			break;  		} +		dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", +			mmhub_cid ? mmhub_cid : "unknown", cid);  	} - +	dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", +		REG_GET_FIELD(status, +		VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); +	dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", +		REG_GET_FIELD(status, +		VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); +	dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", +		REG_GET_FIELD(status, +		VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); +	dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", +		REG_GET_FIELD(status, +		VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); +	dev_err(adev->dev, "\t RW: 0x%x\n", rw);  	return 0;  } @@ -711,6 +691,7 @@ static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,   *   * @adev: amdgpu_device pointer   * @vmid: vm instance to flush + * @vmhub: which hub to flush   * @flush_type: the flush type   *   * Flush the TLB for the requested page table using certain type. @@ -827,6 +808,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,   *   * @adev: amdgpu_device pointer   * @pasid: pasid to be flush + * @flush_type: the flush type + * @all_hub: flush all hubs   *   * Flush the TLB for the requested pasid.   */ @@ -1166,15 +1149,7 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)  static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)  { -	switch (adev->asic_type) { -	case CHIP_ARCTURUS: -	case CHIP_VEGA20: -		adev->gfxhub.funcs = &gfxhub_v1_1_funcs; -		break; -	default: -		adev->gfxhub.funcs = &gfxhub_v1_0_funcs; -		break; -	} +	adev->gfxhub.funcs = &gfxhub_v1_0_funcs;  }  static int gmc_v9_0_early_init(void *handle) @@ -1202,8 +1177,6 @@ static int gmc_v9_0_late_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	int r; -	amdgpu_bo_late_init(adev); -  	r = amdgpu_gmc_allocate_vm_inv_eng(adev);  	if (r)  		return r; @@ -1604,13 +1577,10 @@ static int gmc_v9_0_hw_init(void *handle)  	gmc_v9_0_init_golden_registers(adev);  	if (adev->mode_info.num_crtc) { -		if (adev->asic_type != CHIP_ARCTURUS) { -			/* Lockout access through VGA aperture*/ -			WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); - -			/* disable VGA render */ -			WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); -		} +		/* Lockout access through VGA aperture*/ +		WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); +		/* disable VGA render */ +		WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);  	}  	amdgpu_device_program_register_sequence(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index a13dd9a51149..37d8b6ca4dab 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -179,6 +179,7 @@ static void iceland_ih_irq_disable(struct amdgpu_device *adev)   * iceland_ih_get_wptr - get the IH ring buffer wptr   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr   *   * Get the IH ring buffer wptr from either the register   * or the writeback memory buffer (VI).  Also check for @@ -213,6 +214,8 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev,   * iceland_ih_decode_iv - decode an interrupt vector   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into   *   * Decodes the interrupt vector at the current rptr   * position and also advance the position. @@ -245,6 +248,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev,   * iceland_ih_set_rptr - set the IH ring buffer rptr   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr   *   * Set the IH ring buffer rptr.   */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index c600b61b5f45..7332a320ede8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -210,7 +210,9 @@ static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring)   * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write a fence and a trap command to the ring.   */ @@ -282,7 +284,9 @@ static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,   * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from   * @ib: indirect buffer to execute + * @flags: unused   *   * Write ring commands to execute the indirect buffer.   */ @@ -511,6 +515,7 @@ void jpeg_v1_0_sw_fini(void *handle)   * jpeg_v1_0_start - start JPEG block   *   * @adev: amdgpu_device pointer + * @mode: SPG or DPG mode   *   * Setup and start the JPEG block   */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 94caf5204c8b..3b22953aa62e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -32,19 +32,19 @@  #include "vcn/vcn_2_0_0_sh_mask.h"  #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" -#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 			0x1bfff +#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET				0x1bfff  #define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET				0x4029  #define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET				0x402a  #define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET				0x402b  #define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET		0x40ea -#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x40eb +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x40eb  #define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET				0x40cf  #define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET				0x40d1 -#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 		0x40e8 +#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET			0x40e8  #define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x40e9  #define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET				0x4082  #define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET		0x40ec -#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x40ed +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x40ed  #define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET			0x4085  #define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET				0x4084  #define mmUVD_JRBC_STATUS_INTERNAL_OFFSET				0x4089 @@ -247,7 +247,7 @@ static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev)  	return 0;  } -static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev) +static int jpeg_v2_0_enable_power_gating(struct amdgpu_device *adev)  {  	if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) {  		uint32_t data; @@ -274,7 +274,7 @@ static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev)  	return 0;  } -static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device* adev) +static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device *adev)  {  	uint32_t data; @@ -297,7 +297,7 @@ static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device* adev)  	WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data);  } -static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device* adev) +static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device *adev)  {  	uint32_t data; @@ -489,7 +489,9 @@ void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)   * jpeg_v2_0_dec_ring_emit_fence - emit an fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write a fence and a trap command to the ring.   */ @@ -538,7 +540,9 @@ void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,   * jpeg_v2_0_dec_ring_emit_ib - execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from   * @ib: indirect buffer to execute + * @flags: unused   *   * Write ring commands to execute the indirect buffer.   */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 845306f63cdb..c6724a0e0c43 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -247,7 +247,7 @@ static int jpeg_v2_5_resume(void *handle)  	return r;  } -static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device* adev, int inst) +static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device *adev, int inst)  {  	uint32_t data; @@ -276,7 +276,7 @@ static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device* adev, int inst)  	WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data);  } -static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device* adev, int inst) +static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst)  {  	uint32_t data; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 3a0dff53654d..e8fbb2a0de34 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -213,7 +213,7 @@ static int jpeg_v3_0_resume(void *handle)  	return r;  } -static void jpeg_v3_0_disable_clock_gating(struct amdgpu_device* adev) +static void jpeg_v3_0_disable_clock_gating(struct amdgpu_device *adev)  {  	uint32_t data = 0; @@ -243,7 +243,7 @@ static void jpeg_v3_0_disable_clock_gating(struct amdgpu_device* adev)  	WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data);  } -static void jpeg_v3_0_enable_clock_gating(struct amdgpu_device* adev) +static void jpeg_v3_0_enable_clock_gating(struct amdgpu_device *adev)  {  	uint32_t data = 0; @@ -286,7 +286,7 @@ static int jpeg_v3_0_disable_static_power_gating(struct amdgpu_device *adev)  	return 0;  } -static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device* adev) +static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device *adev)  {  	/* enable anti hang mechanism */  	WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 1c22d8393b21..985e454463e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -46,7 +46,7 @@ static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring)  	struct amdgpu_device *adev = ring->adev;  	if (ring->use_doorbell) { -		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], +		atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs],  			     ring->wptr);  		WDOORBELL64(ring->doorbell_index, ring->wptr);  	} else { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index f84701c562bf..d7b39c07de20 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -344,7 +344,7 @@ static void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)  	u32 i;  	/* Disable all tables */ -	for (i = 0; i < 16; i++) +	for (i = 0; i < AMDGPU_NUM_VMID; i++)  		WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL,  				    i * hub->ctx_distance, 0); @@ -409,7 +409,7 @@ static void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool  				CRASH_ON_NO_RETRY_FAULT, 1);  		tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,  				CRASH_ON_RETRY_FAULT, 1); -    } +	}  	WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp);  } @@ -712,7 +712,7 @@ static int mmhub_v1_0_get_ras_error_count(struct amdgpu_device *adev,  	uint32_t sec_cnt, ded_cnt;  	for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_ras_fields); i++) { -		if(mmhub_v1_0_ras_fields[i].reg_offset != reg->reg_offset) +		if (mmhub_v1_0_ras_fields[i].reg_offset != reg->reg_offset)  			continue;  		sec_cnt = (value & diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 2063700f0bc6..f107385faba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -136,6 +136,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,  		break;  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		mmhub_cid = mmhub_client_ids_sienna_cichlid[cid][rw];  		break;  	default: @@ -195,17 +196,17 @@ static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)  	uint64_t value;  	uint32_t tmp; -	/* Disable AGP. */ +	/* Program the AGP BAR */  	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0); -	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, 0); -	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, 0x00FFFFFF); +	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); +	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);  	if (!amdgpu_sriov_vf(adev)) {  		/* Program the system aperture low logical page number. */  		WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, -			     adev->gmc.vram_start >> 18); +			     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);  		WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, -			     adev->gmc.vram_end >> 18); +			     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);  	}  	/* Set default page address. */ @@ -421,7 +422,7 @@ static void mmhub_v2_0_gart_disable(struct amdgpu_device *adev)  	u32 i;  	/* Disable all tables */ -	for (i = 0; i < 16; i++) +	for (i = 0; i < AMDGPU_NUM_VMID; i++)  		WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL,  				    i * hub->ctx_distance, 0); @@ -543,6 +544,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		def  = data  = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);  		def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);  		break; @@ -576,6 +578,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		if (def != data)  			WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);  		if (def1 != data1) @@ -598,6 +601,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		def  = data  = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);  		break;  	default: @@ -614,6 +618,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade  		switch (adev->asic_type) {  		case CHIP_SIENNA_CICHLID:  		case CHIP_NAVY_FLOUNDER: +		case CHIP_DIMGREY_CAVEFISH:  			WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);  			break;  		default: @@ -635,6 +640,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,  	case CHIP_NAVI12:  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		mmhub_v2_0_update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE);  		mmhub_v2_0_update_medium_grain_light_sleep(adev, @@ -657,6 +663,7 @@ static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		data  = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);  		data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);  		break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c new file mode 100644 index 000000000000..b72c8e4ca36b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -0,0 +1,589 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "mmhub_v2_3.h" + +#include "mmhub/mmhub_2_3_0_offset.h" +#include "mmhub/mmhub_2_3_0_sh_mask.h" +#include "mmhub/mmhub_2_3_0_default.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +static const char *mmhub_client_ids_vangogh[][2] = { +	[0][0] = "MP0", +	[1][0] = "MP1", +	[2][0] = "DCEDMC", +	[3][0] = "DCEVGA", +	[13][0] = "UTCL2", +	[26][0] = "OSS", +	[27][0] = "HDP", +	[28][0] = "VCN", +	[29][0] = "VCNU", +	[30][0] = "JPEG", +	[0][1] = "MP0", +	[1][1] = "MP1", +	[2][1] = "DCEDMC", +	[3][1] = "DCEVGA", +	[4][1] = "DCEDWB", +	[5][1] = "XDP", +	[26][1] = "OSS", +	[27][1] = "HDP", +	[28][1] = "VCN", +	[29][1] = "VCNU", +	[30][1] = "JPEG", +}; + +static uint32_t mmhub_v2_3_get_invalidate_req(unsigned int vmid, +					      uint32_t flush_type) +{ +	u32 req = 0; + +	/* invalidate using legacy mode on vmid*/ +	req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, +			    PER_VMID_INVALIDATE_REQ, 1 << vmid); +	req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); +	req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); +	req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); +	req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); +	req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); +	req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); +	req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, +			    CLEAR_PROTECTION_FAULT_STATUS_ADDR,	0); + +	return req; +} + +static void +mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev, +					     uint32_t status) +{ +	uint32_t cid, rw; +	const char *mmhub_cid = NULL; + +	cid = REG_GET_FIELD(status, +			    MMVM_L2_PROTECTION_FAULT_STATUS, CID); +	rw = REG_GET_FIELD(status, +			   MMVM_L2_PROTECTION_FAULT_STATUS, RW); + +	dev_err(adev->dev, +		"MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", +		status); +	switch (adev->asic_type) { +	case CHIP_VANGOGH: +		mmhub_cid = mmhub_client_ids_vangogh[cid][rw]; +		break; +	default: +		mmhub_cid = NULL; +		break; +	} +	dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", +		mmhub_cid ? mmhub_cid : "unknown", cid); +	dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", +		REG_GET_FIELD(status, +		MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); +	dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", +		REG_GET_FIELD(status, +		MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); +	dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", +		REG_GET_FIELD(status, +		MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); +	dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", +		REG_GET_FIELD(status, +		MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); +	dev_err(adev->dev, "\t RW: 0x%x\n", rw); +} + +static void mmhub_v2_3_setup_vm_pt_regs(struct amdgpu_device *adev, +					uint32_t vmid, +					uint64_t page_table_base) +{ +	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + +	WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, +			    hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); + +	WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, +			    hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base)); +} + +static void mmhub_v2_3_init_gart_aperture_regs(struct amdgpu_device *adev) +{ +	uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + +	mmhub_v2_3_setup_vm_pt_regs(adev, 0, pt_base); + +	WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, +		     (u32)(adev->gmc.gart_start >> 12)); +	WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, +		     (u32)(adev->gmc.gart_start >> 44)); + +	WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, +		     (u32)(adev->gmc.gart_end >> 12)); +	WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, +		     (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v2_3_init_system_aperture_regs(struct amdgpu_device *adev) +{ +	uint64_t value; +	uint32_t tmp; + +	/* Disable AGP. */ +	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0); +	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); +	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + +	/* Program the system aperture low logical page number. */ +	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, +		     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); +	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, +		     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + +	/* Set default page address. */ +	value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + +		adev->vm_manager.vram_base_offset; +	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, +		     (u32)(value >> 12)); +	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, +		     (u32)(value >> 44)); + +	/* Program "protection fault". */ +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, +		     (u32)(adev->dummy_page_addr >> 12)); +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, +		     (u32)((u64)adev->dummy_page_addr >> 44)); + +	tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2, +			    ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2, tmp); +} + +static void mmhub_v2_3_init_tlb_regs(struct amdgpu_device *adev) +{ +	uint32_t tmp; + +	/* Setup TLB control */ +	tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL); + +	tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); +	tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); +	tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, +			    ENABLE_ADVANCED_DRIVER_MODEL, 1); +	tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, +			    SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); +	tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); +	tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, +			    MTYPE, MTYPE_UC); /* UC, uncached */ + +	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void mmhub_v2_3_init_cache_regs(struct amdgpu_device *adev) +{ +	uint32_t tmp; + +	/* Setup L2 cache */ +	tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, +			    ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); +	/* XXX for emulation, Refer to closed source code.*/ +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, +			    0); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); + +	tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp); + +	tmp = mmMMVM_L2_CNTL3_DEFAULT; +	if (adev->gmc.translate_further) { +		tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12); +		tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, +				    L2_CACHE_BIGK_FRAGMENT_SIZE, 9); +	} else { +		tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9); +		tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, +				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6); +	} +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp); + +	tmp = mmMMVM_L2_CNTL4_DEFAULT; +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL4, tmp); + +	tmp = mmMMVM_L2_CNTL5_DEFAULT; +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); +	WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp); +} + +static void mmhub_v2_3_enable_system_domain(struct amdgpu_device *adev) +{ +	uint32_t tmp; + +	tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL); +	tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); +	tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); +	tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, +			    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); +	WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp); +} + +static void mmhub_v2_3_disable_identity_aperture(struct amdgpu_device *adev) +{ +	WREG32_SOC15(MMHUB, 0, +		     mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, +		     0xFFFFFFFF); +	WREG32_SOC15(MMHUB, 0, +		     mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, +		     0x0000000F); + +	WREG32_SOC15(MMHUB, 0, +		     mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); +	WREG32_SOC15(MMHUB, 0, +		     mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); + +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, +		     0); +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, +		     0); +} + +static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) +{ +	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; +	int i; +	uint32_t tmp; + +	for (i = 0; i <= 14; i++) { +		tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i); +		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); +		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, +				    adev->vm_manager.num_level); +		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, +				    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, +				    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, +				    1); +		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, +				    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, +				    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, +				    READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, +				    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, +				    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); +		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, +				    PAGE_TABLE_BLOCK_SIZE, +				    adev->vm_manager.block_size - 9); +		/* Send no-retry XNACK on fault to suppress VM fault storm. */ +		tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, +				    RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, +				    !amdgpu_noretry); +		WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, +				    i * hub->ctx_distance, tmp); +		WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, +				    i * hub->ctx_addr_distance, 0); +		WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, +				    i * hub->ctx_addr_distance, 0); +		WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, +				    i * hub->ctx_addr_distance, +				    lower_32_bits(adev->vm_manager.max_pfn - 1)); +		WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, +				    i * hub->ctx_addr_distance, +				    upper_32_bits(adev->vm_manager.max_pfn - 1)); +	} +} + +static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev) +{ +	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; +	unsigned i; + +	for (i = 0; i < 18; ++i) { +		WREG32_SOC15_OFFSET(MMHUB, 0, +				    mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, +				    i * hub->eng_addr_distance, 0xffffffff); +		WREG32_SOC15_OFFSET(MMHUB, 0, +				    mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, +				    i * hub->eng_addr_distance, 0x1f); +	} +} + +static int mmhub_v2_3_gart_enable(struct amdgpu_device *adev) +{ +	if (amdgpu_sriov_vf(adev)) { +		/* +		 * MMMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are +		 * VF copy registers so vbios post doesn't program them, for +		 * SRIOV driver need to program them +		 */ +		WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_BASE, +			     adev->gmc.vram_start >> 24); +		WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_TOP, +			     adev->gmc.vram_end >> 24); +	} + +	/* GART Enable. */ +	mmhub_v2_3_init_gart_aperture_regs(adev); +	mmhub_v2_3_init_system_aperture_regs(adev); +	mmhub_v2_3_init_tlb_regs(adev); +	mmhub_v2_3_init_cache_regs(adev); + +	mmhub_v2_3_enable_system_domain(adev); +	mmhub_v2_3_disable_identity_aperture(adev); +	mmhub_v2_3_setup_vmid_config(adev); +	mmhub_v2_3_program_invalidation(adev); + +	return 0; +} + +static void mmhub_v2_3_gart_disable(struct amdgpu_device *adev) +{ +	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; +	u32 tmp; +	u32 i; + +	/* Disable all tables */ +	for (i = 0; i < AMDGPU_NUM_VMID; i++) +		WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, +				    i * hub->ctx_distance, 0); + +	/* Setup TLB control */ +	tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL); +	tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); +	tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, +			    ENABLE_ADVANCED_DRIVER_MODEL, 0); +	WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp); + +	/* Setup L2 cache */ +	tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0); +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, 0); +} + +/** + * mmhub_v2_3_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void mmhub_v2_3_set_fault_enable_default(struct amdgpu_device *adev, +						bool value) +{ +	u32 tmp; +	tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +			    RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +			    PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +			    PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +			    PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +			    TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, +			    value); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +			    NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +			    DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +			    VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +			    READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +			    WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); +	tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +			    EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); +	if (!value) { +		tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +				CRASH_ON_NO_RETRY_FAULT, 1); +		tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, +				CRASH_ON_RETRY_FAULT, 1); +	} +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +static const struct amdgpu_vmhub_funcs mmhub_v2_3_vmhub_funcs = { +	.print_l2_protection_fault_status = mmhub_v2_3_print_l2_protection_fault_status, +	.get_invalidate_req = mmhub_v2_3_get_invalidate_req, +}; + +static void mmhub_v2_3_init(struct amdgpu_device *adev) +{ +	struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + +	hub->ctx0_ptb_addr_lo32 = +		SOC15_REG_OFFSET(MMHUB, 0, +				 mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); +	hub->ctx0_ptb_addr_hi32 = +		SOC15_REG_OFFSET(MMHUB, 0, +				 mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); +	hub->vm_inv_eng0_sem = +		SOC15_REG_OFFSET(MMHUB, 0, +				 mmMMVM_INVALIDATE_ENG0_SEM); +	hub->vm_inv_eng0_req = +		SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ); +	hub->vm_inv_eng0_ack = +		SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ACK); +	hub->vm_context0_cntl = +		SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL); +	hub->vm_l2_pro_fault_status = +		SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_STATUS); +	hub->vm_l2_pro_fault_cntl = +		SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); + +	hub->ctx_distance = mmMMVM_CONTEXT1_CNTL - mmMMVM_CONTEXT0_CNTL; +	hub->ctx_addr_distance = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - +		mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; +	hub->eng_distance = mmMMVM_INVALIDATE_ENG1_REQ - +		mmMMVM_INVALIDATE_ENG0_REQ; +	hub->eng_addr_distance = mmMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - +		mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + +	hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | +		MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | +		MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | +		MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | +		MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | +		MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | +		MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + +	hub->vmhub_funcs = &mmhub_v2_3_vmhub_funcs; +} + +static void +mmhub_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, +					    bool enable) +{ +	uint32_t def, data, def1, data1; + +	def  = data  = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); +	def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + +	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { +		data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; + +		data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | +		           DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | +		           DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | +		           DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | +		           DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | +		           DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + +	} else { +		data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; + +		data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | +			  DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | +			  DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | +			  DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | +			  DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | +			  DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); +	} + +	if (def != data) +		WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); +	if (def1 != data1) +		WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1); +} + +static void +mmhub_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev, +					   bool enable) +{ +	uint32_t def, data; + +	def  = data  = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + +	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) +		data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; +	else +		data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + +	if (def != data) +		WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); +} + +static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev, +				      enum amd_clockgating_state state) +{ +	if (amdgpu_sriov_vf(adev)) +		return 0; + +	mmhub_v2_3_update_medium_grain_clock_gating(adev, +			state == AMD_CG_STATE_GATE ? true : false); +	mmhub_v2_3_update_medium_grain_light_sleep(adev, +			state == AMD_CG_STATE_GATE ? true : false); + +	return 0; +} + +static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ +	int data, data1; + +	if (amdgpu_sriov_vf(adev)) +		*flags = 0; + +	data  = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); +	data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + +	/* AMD_CG_SUPPORT_MC_MGCG */ +	if ((data & MM_ATC_L2_MISC_CG__ENABLE_MASK) && +	    !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | +		       DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) +		*flags |= AMD_CG_SUPPORT_MC_MGCG; + +	/* AMD_CG_SUPPORT_MC_LS */ +	if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) +		*flags |= AMD_CG_SUPPORT_MC_LS; +} + +const struct amdgpu_mmhub_funcs mmhub_v2_3_funcs = { +	.ras_late_init = amdgpu_mmhub_ras_late_init, +	.init = mmhub_v2_3_init, +	.gart_enable = mmhub_v2_3_gart_enable, +	.set_fault_enable_default = mmhub_v2_3_set_fault_enable_default, +	.gart_disable = mmhub_v2_3_gart_disable, +	.set_clockgating = mmhub_v2_3_set_clockgating, +	.get_clockgating = mmhub_v2_3_get_clockgating, +	.setup_vm_pt_regs = mmhub_v2_3_setup_vm_pt_regs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.h new file mode 100644 index 000000000000..2926d21dea8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.h @@ -0,0 +1,28 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V2_3_H__ +#define __MMHUB_V2_3_H__ + +extern const struct amdgpu_mmhub_funcs mmhub_v2_3_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 66748bb01b52..4a31737b6bb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -405,7 +405,7 @@ static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev)  	for (j = 0; j < MMHUB_NUM_INSTANCES; j++) {  		/* Disable all tables */ -		for (i = 0; i < 16; i++) +		for (i = 0; i < AMDGPU_NUM_VMID; i++)  			WREG32_SOC15_OFFSET(MMHUB, 0,  					    mmVML2VC0_VM_CONTEXT0_CNTL,  					    j * MMHUB_INSTANCE_REGISTER_OFFSET + diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index f5ce9a9f4cf5..7767ccca526b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -187,7 +187,16 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,  static int xgpu_ai_request_reset(struct amdgpu_device *adev)  { -	return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS); +	int ret, i = 0; + +	while (i < AI_MAILBOX_POLL_MSG_REP_MAX) { +		ret = xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS); +		if (!ret) +			break; +		i++; +	} + +	return ret;  }  static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 83b453f5d717..50572635d0f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -25,8 +25,9 @@  #define __MXGPU_AI_H__  #define AI_MAILBOX_POLL_ACK_TIMEDOUT	500 -#define AI_MAILBOX_POLL_MSG_TIMEDOUT	12000 +#define AI_MAILBOX_POLL_MSG_TIMEDOUT	6000  #define AI_MAILBOX_POLL_FLR_TIMEDOUT	5000 +#define AI_MAILBOX_POLL_MSG_REP_MAX	11  enum idh_request {  	IDH_REQ_GPU_INIT_ACCESS = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 666ed99cc14b..dd5c1e6ce009 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -200,7 +200,16 @@ static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,  static int xgpu_nv_request_reset(struct amdgpu_device *adev)  { -	return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS); +	int ret, i = 0; + +	while (i < NV_MAILBOX_POLL_MSG_REP_MAX) { +		ret = xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS); +		if (!ret) +			break; +		i++; +	} + +	return ret;  }  static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 52605e14a1a5..9f5808616174 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -27,6 +27,7 @@  #define NV_MAILBOX_POLL_ACK_TIMEDOUT	500  #define NV_MAILBOX_POLL_MSG_TIMEDOUT	6000  #define NV_MAILBOX_POLL_FLR_TIMEDOUT	5000 +#define NV_MAILBOX_POLL_MSG_REP_MAX	11  enum idh_request {  	IDH_REQ_GPU_INIT_ACCESS = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 74b1e7dc49a9..7ba229e43799 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -136,6 +136,9 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev)  		}  		adev->irq.ih2.enabled = true;  	} + +	if (adev->irq.ih_soft.ring_size) +		adev->irq.ih_soft.enabled = true;  }  /** @@ -314,6 +317,8 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)  			switch (adev->asic_type) {  			case CHIP_SIENNA_CICHLID:  			case CHIP_NAVY_FLOUNDER: +			case CHIP_VANGOGH: +			case CHIP_DIMGREY_CAVEFISH:  				ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid);  				ih_chicken = REG_SET_FIELD(ih_chicken,  						IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); @@ -440,6 +445,7 @@ static void navi10_ih_irq_disable(struct amdgpu_device *adev)   * navi10_ih_get_wptr - get the IH ring buffer wptr   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr   *   * Get the IH ring buffer wptr from either the register   * or the writeback memory buffer (NAVI10).  Also check for @@ -500,6 +506,8 @@ out:   * navi10_ih_decode_iv - decode an interrupt vector   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into   *   * Decodes the interrupt vector at the current rptr   * position and also advance the position. @@ -543,6 +551,7 @@ static void navi10_ih_decode_iv(struct amdgpu_device *adev,   * navi10_ih_irq_rearm - rearm IRQ if lost   *   * @adev: amdgpu_device pointer + * @ih: IH ring to match   *   */  static void navi10_ih_irq_rearm(struct amdgpu_device *adev, @@ -576,6 +585,7 @@ static void navi10_ih_irq_rearm(struct amdgpu_device *adev,   *   * @adev: amdgpu_device pointer   * + * @ih: IH ring buffer to set rptr   * Set the IH ring buffer rptr.   */  static void navi10_ih_set_rptr(struct amdgpu_device *adev, @@ -660,8 +670,11 @@ static int navi10_ih_sw_init(void *handle)  	/* use gpu virtual address for ih ring  	 * until ih_checken is programmed to allow  	 * use bus address for ih ring by psp bl */ -	use_bus_addr = -		(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; +	if ((adev->flags & AMD_IS_APU) || +	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) +		use_bus_addr = false; +	else +		use_bus_addr = true;  	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);  	if (r)  		return r; @@ -690,6 +703,10 @@ static int navi10_ih_sw_init(void *handle)  					(adev->doorbell_index.ih + 2) << 1;  	} +	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); +	if (r) +		return r; +  	r = amdgpu_irq_init(adev);  	return r; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 7429f30398b9..b5c3db16c2b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -28,10 +28,12 @@  #include "nbio/nbio_2_3_offset.h"  #include "nbio/nbio_2_3_sh_mask.h"  #include <uapi/linux/kfd_ioctl.h> +#include <linux/pci.h>  #define smnPCIE_CONFIG_CNTL	0x11180044  #define smnCPM_CONTROL		0x11180460  #define smnPCIE_CNTL2		0x11180070 +#define smnPCIE_LC_CNTL		0x11140280  #define mmBIF_SDMA2_DOORBELL_RANGE		0x01d6  #define mmBIF_SDMA2_DOORBELL_RANGE_BASE_IDX	2 @@ -51,8 +53,17 @@ static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)  static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev)  { -	u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); +	u32 tmp; +	/* +	 * guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, +	 * therefore we force rev_id to 0 (which is the default value) +	 */ +	if (amdgpu_sriov_vf(adev)) { +		return 0; +	} + +	tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);  	tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;  	tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -312,6 +323,42 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev)  		WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);  } +#define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT		0x00000000 // off by default, no gains over L1 +#define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT		0x00000009 // 1=1us, 9=1ms +#define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT	0x0000000E // 4ms + +static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, +				  bool enable) +{ +	uint32_t def, data; + +	def = data = RREG32_PCIE(smnPCIE_LC_CNTL); + +	if (enable) { +		/* Disable ASPM L0s/L1 first */ +		data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK | PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK); + +		data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; + +		if (pci_is_thunderbolt_attached(adev->pdev)) +			data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT  << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; +		else +			data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + +		data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; +	} else { +		/* Disbale ASPM L1 */ +		data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; +		/* Disable ASPM TxL0s */ +		data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; +		/* Disable ACPI L1 */ +		data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; +	} + +	if (def != data) +		WREG32_PCIE(smnPCIE_LC_CNTL, data); +} +  const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {  	.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,  	.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, @@ -332,4 +379,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {  	.ih_control = nbio_v2_3_ih_control,  	.init_registers = nbio_v2_3_init_registers,  	.remap_hdp_registers = nbio_v2_3_remap_hdp_registers, +	.enable_aspm = nbio_v2_3_enable_aspm,  }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 7b2fb050407d..d2f1fe55d388 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -32,7 +32,7 @@  static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev)  { -        u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); +	u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);  	tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;  	tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -114,7 +114,7 @@ static void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *ad  static void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev,  					bool use_doorbell, int doorbell_index)  { -	u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); +	u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE);  	if (use_doorbell) {  		ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index d34628e113fc..ae685813c419 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -43,7 +43,7 @@ static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)  static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)  { -        u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); +	u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);  	tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;  	tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -126,7 +126,7 @@ static void nbio_v7_0_enable_doorbell_selfring_aperture(struct amdgpu_device *ad  static void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev,  					bool use_doorbell, int doorbell_index)  { -	u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); +	u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE);  	if (use_doorbell) {  		ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c new file mode 100644 index 000000000000..aa36022670f9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -0,0 +1,341 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbio_v7_2.h" + +#include "nbio/nbio_7_2_0_offset.h" +#include "nbio/nbio_7_2_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +static void nbio_v7_2_remap_hdp_registers(struct amdgpu_device *adev) +{ +	WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, +		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); +	WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL, +		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + +static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev) +{ +	u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); + +	tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; +	tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + +	return tmp; +} + +static void nbio_v7_2_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ +	if (enable) +		WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, +			     BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | +			     BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK); +	else +		WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0); +} + +static void nbio_v7_2_hdp_flush(struct amdgpu_device *adev, +				struct amdgpu_ring *ring) +{ +	if (!ring || !ring->funcs->emit_wreg) +		WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); +	else +		amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); +} + +static u32 nbio_v7_2_get_memsize(struct amdgpu_device *adev) +{ +	return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE); +} + +static void nbio_v7_2_sdma_doorbell_range(struct amdgpu_device *adev, int instance, +					  bool use_doorbell, int doorbell_index, +					  int doorbell_size) +{ +	u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_SDMA0_DOORBELL_RANGE); +	u32 doorbell_range = RREG32_PCIE_PORT(reg); + +	if (use_doorbell) { +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       GDC0_BIF_SDMA0_DOORBELL_RANGE, +					       OFFSET, doorbell_index); +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       GDC0_BIF_SDMA0_DOORBELL_RANGE, +					       SIZE, doorbell_size); +	} else { +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       GDC0_BIF_SDMA0_DOORBELL_RANGE, +					       SIZE, 0); +	} + +	WREG32_PCIE_PORT(reg, doorbell_range); +} + +static void nbio_v7_2_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, +					 int doorbell_index, int instance) +{ +	u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE); +	u32 doorbell_range = RREG32_PCIE_PORT(reg); + +	if (use_doorbell) { +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET, +					       doorbell_index); +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8); +	} else { +		doorbell_range = REG_SET_FIELD(doorbell_range, +					       GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0); +	} + +	WREG32_PCIE_PORT(reg, doorbell_range); +} + +static void nbio_v7_2_enable_doorbell_aperture(struct amdgpu_device *adev, +					       bool enable) +{ +	u32 reg; + +	reg = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN); +	reg = REG_SET_FIELD(reg, RCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, +			    BIF_DOORBELL_APER_EN, enable ? 1 : 0); + +	WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, reg); +} + +static void nbio_v7_2_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, +							bool enable) +{ +	u32 tmp = 0; + +	if (enable) { +		tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, +				    DOORBELL_SELFRING_GPA_APER_EN, 1) | +		      REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, +				    DOORBELL_SELFRING_GPA_APER_MODE, 1) | +		      REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, +				    DOORBELL_SELFRING_GPA_APER_SIZE, 0); + +		WREG32_SOC15(NBIO, 0, +			     regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW, +			     lower_32_bits(adev->doorbell.base)); +		WREG32_SOC15(NBIO, 0, +			     regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, +			     upper_32_bits(adev->doorbell.base)); +	} + +	WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, +		     tmp); +} + + +static void nbio_v7_2_ih_doorbell_range(struct amdgpu_device *adev, +					bool use_doorbell, int doorbell_index) +{ +	u32 ih_doorbell_range = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE)); + +	if (use_doorbell) { +		ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, +						  GDC0_BIF_IH_DOORBELL_RANGE, OFFSET, +						  doorbell_index); +		ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, +						  GDC0_BIF_IH_DOORBELL_RANGE, SIZE, +						  2); +	} else { +		ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, +						  GDC0_BIF_IH_DOORBELL_RANGE, SIZE, +						  0); +	} + +	WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE), +			 ih_doorbell_range); +} + +static void nbio_v7_2_ih_control(struct amdgpu_device *adev) +{ +	u32 interrupt_cntl; + +	/* setup interrupt control */ +	WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, +		     adev->dummy_page_addr >> 8); + +	interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL); +	/* +	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi +	 * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN +	 */ +	interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, +				       IH_DUMMY_RD_OVERRIDE, 0); + +	/* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ +	interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, +				       IH_REQ_NONSNOOP_EN, 0); + +	WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl); +} + +static void nbio_v7_2_update_medium_grain_clock_gating(struct amdgpu_device *adev, +						       bool enable) +{ +	uint32_t def, data; + +	def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regCPM_CONTROL)); +	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) { +		data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | +			 CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | +			 CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | +			 CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | +			 CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | +			 CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); +	} else { +		data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | +			  CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | +			  CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | +			  CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | +			  CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | +			  CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); +	} + +	if (def != data) +		WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regCPM_CONTROL), data); +} + +static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev, +						      bool enable) +{ +	uint32_t def, data; + +	def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2)); +	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { +		data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | +			 PCIE_CNTL2__MST_MEM_LS_EN_MASK | +			 PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); +	} else { +		data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK | +			  PCIE_CNTL2__MST_MEM_LS_EN_MASK | +			  PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); +	} + +	if (def != data) +		WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data); +} + +static void nbio_v7_2_get_clockgating_state(struct amdgpu_device *adev, +					    u32 *flags) +{ +	int data; + +	/* AMD_CG_SUPPORT_BIF_MGCG */ +	data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regCPM_CONTROL)); +	if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) +		*flags |= AMD_CG_SUPPORT_BIF_MGCG; + +	/* AMD_CG_SUPPORT_BIF_LS */ +	data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2)); +	if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK) +		*flags |= AMD_CG_SUPPORT_BIF_LS; +} + +static u32 nbio_v7_2_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ +	return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ); +} + +static u32 nbio_v7_2_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ +	return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE); +} + +static u32 nbio_v7_2_get_pcie_index_offset(struct amdgpu_device *adev) +{ +	return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2); +} + +static u32 nbio_v7_2_get_pcie_data_offset(struct amdgpu_device *adev) +{ +	return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2); +} + +static u32 nbio_v7_2_get_pcie_port_index_offset(struct amdgpu_device *adev) +{ +	return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX); +} + +static u32 nbio_v7_2_get_pcie_port_data_offset(struct amdgpu_device *adev) +{ +	return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA); +} + +const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg = { +	.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, +	.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, +	.ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, +	.ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK, +	.ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK, +	.ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK, +	.ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK, +	.ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK, +	.ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK, +	.ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK, +	.ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK, +	.ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK, +}; + +static void nbio_v7_2_init_registers(struct amdgpu_device *adev) +{ +	uint32_t def, data; + +	def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL)); +	data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); +	data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + +	if (def != data) +		WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL), +				 data); +} + +const struct amdgpu_nbio_funcs nbio_v7_2_funcs = { +	.get_hdp_flush_req_offset = nbio_v7_2_get_hdp_flush_req_offset, +	.get_hdp_flush_done_offset = nbio_v7_2_get_hdp_flush_done_offset, +	.get_pcie_index_offset = nbio_v7_2_get_pcie_index_offset, +	.get_pcie_data_offset = nbio_v7_2_get_pcie_data_offset, +	.get_pcie_port_index_offset = nbio_v7_2_get_pcie_port_index_offset, +	.get_pcie_port_data_offset = nbio_v7_2_get_pcie_port_data_offset, +	.get_rev_id = nbio_v7_2_get_rev_id, +	.mc_access_enable = nbio_v7_2_mc_access_enable, +	.hdp_flush = nbio_v7_2_hdp_flush, +	.get_memsize = nbio_v7_2_get_memsize, +	.sdma_doorbell_range = nbio_v7_2_sdma_doorbell_range, +	.vcn_doorbell_range = nbio_v7_2_vcn_doorbell_range, +	.enable_doorbell_aperture = nbio_v7_2_enable_doorbell_aperture, +	.enable_doorbell_selfring_aperture = nbio_v7_2_enable_doorbell_selfring_aperture, +	.ih_doorbell_range = nbio_v7_2_ih_doorbell_range, +	.update_medium_grain_clock_gating = nbio_v7_2_update_medium_grain_clock_gating, +	.update_medium_grain_light_sleep = nbio_v7_2_update_medium_grain_light_sleep, +	.get_clockgating_state = nbio_v7_2_get_clockgating_state, +	.ih_control = nbio_v7_2_ih_control, +	.init_registers = nbio_v7_2_init_registers, +	.remap_hdp_registers = nbio_v7_2_remap_hdp_registers, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.h new file mode 100644 index 000000000000..a8e8e65648a0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.h @@ -0,0 +1,32 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V7_2_H__ +#define __NBIO_V7_2_H__ + +#include "soc15_common.h" + +extern const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg; +extern const struct amdgpu_nbio_funcs nbio_v7_2_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 03462c857498..6bee3677394a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -49,6 +49,7 @@  #include "gfxhub_v2_0.h"  #include "mmhub_v2_0.h"  #include "nbio_v2_3.h" +#include "nbio_v7_2.h"  #include "nv.h"  #include "navi10_ih.h"  #include "gfx_v10_0.h" @@ -95,6 +96,21 @@ static u64 nv_pcie_rreg64(struct amdgpu_device *adev, u32 reg)  	return amdgpu_device_indirect_rreg64(adev, address, data, reg);  } +static u32 nv_pcie_port_rreg(struct amdgpu_device *adev, u32 reg) +{ +	unsigned long flags, address, data; +	u32 r; +	address = adev->nbio.funcs->get_pcie_port_index_offset(adev); +	data = adev->nbio.funcs->get_pcie_port_data_offset(adev); + +	spin_lock_irqsave(&adev->pcie_idx_lock, flags); +	WREG32(address, reg * 4); +	(void)RREG32(address); +	r = RREG32(data); +	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +	return r; +} +  static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)  {  	unsigned long address, data; @@ -105,6 +121,21 @@ static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)  	amdgpu_device_indirect_wreg64(adev, address, data, reg, v);  } +static void nv_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ +	unsigned long flags, address, data; + +	address = adev->nbio.funcs->get_pcie_port_index_offset(adev); +	data = adev->nbio.funcs->get_pcie_port_data_offset(adev); + +	spin_lock_irqsave(&adev->pcie_idx_lock, flags); +	WREG32(address, reg * 4); +	(void)RREG32(address); +	WREG32(data, v); +	(void)RREG32(data); +	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} +  static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg)  {  	unsigned long flags, address, data; @@ -254,7 +285,8 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num,  	*value = 0;  	for (i = 0; i < ARRAY_SIZE(nv_allowed_read_registers); i++) {  		en = &nv_allowed_read_registers[i]; -		if (reg_offset != +		if ((i == 7 && (adev->sdma.num_instances == 1)) || /* some asics don't have SDMA1 */ +		    reg_offset !=  		    (adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset))  			continue; @@ -330,6 +362,7 @@ nv_asic_reset_method(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		return AMD_RESET_METHOD_MODE1;  	default:  		if (smu_baco_is_support(smu)) @@ -443,6 +476,12 @@ legacy_init:  	case CHIP_NAVY_FLOUNDER:  		sienna_cichlid_reg_base_init(adev);  		break; +	case CHIP_VANGOGH: +		vangogh_reg_base_init(adev); +		break; +	case CHIP_DIMGREY_CAVEFISH: +		dimgrey_cavefish_reg_base_init(adev); +		break;  	default:  		return -EINVAL;  	} @@ -455,10 +494,11 @@ void nv_set_virt_ops(struct amdgpu_device *adev)  	adev->virt.ops = &xgpu_nv_virt_ops;  } -static bool nv_is_blockchain_sku(struct pci_dev *pdev) +static bool nv_is_headless_sku(struct pci_dev *pdev)  { -	if (pdev->device == 0x731E && -	    (pdev->revision == 0xC6 || pdev->revision == 0xC7)) +	if ((pdev->device == 0x731E && +	    (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || +	    (pdev->device == 0x7340 && pdev->revision == 0xC9))  		return true;  	return false;  } @@ -467,8 +507,13 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)  {  	int r; -	adev->nbio.funcs = &nbio_v2_3_funcs; -	adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; +	if (adev->flags & AMD_IS_APU) { +		adev->nbio.funcs = &nbio_v7_2_funcs; +		adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg; +	} else { +		adev->nbio.funcs = &nbio_v2_3_funcs; +		adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; +	}  	if (adev->asic_type == CHIP_SIENNA_CICHLID)  		adev->gmc.xgmi.supported = true; @@ -491,8 +536,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)  		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))  			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);  #if defined(CONFIG_DRM_AMD_DC) -		else if (amdgpu_device_has_dc_support(adev) && -			 !nv_is_blockchain_sku(adev->pdev)) +		else if (amdgpu_device_has_dc_support(adev))  			amdgpu_device_ip_block_add(adev, &dm_ip_block);  #endif  		amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); @@ -500,7 +544,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)  		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&  		    !amdgpu_sriov_vf(adev))  			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); -		if (!nv_is_blockchain_sku(adev->pdev)) +		if (!nv_is_headless_sku(adev->pdev))  			amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);  		amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);  		if (adev->enable_mes) @@ -535,7 +579,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)  		if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))  			amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);  		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && -		    is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) +		    is_support_sw_smu(adev))  			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);  		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))  			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -575,6 +619,44 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)  		    is_support_sw_smu(adev))  			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);  		break; +	case CHIP_VANGOGH: +		amdgpu_device_ip_block_add(adev, &nv_common_ip_block); +		amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); +		amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); +		if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) +			amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); +		amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); +		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) +			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) +		else if (amdgpu_device_has_dc_support(adev)) +			amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif +		amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); +		amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); +		amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); +		amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); +		break; +	case CHIP_DIMGREY_CAVEFISH: +		amdgpu_device_ip_block_add(adev, &nv_common_ip_block); +		amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); +		amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); +		if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) +			amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); +		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && +		    is_support_sw_smu(adev)) +			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); +		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) +			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) +                else if (amdgpu_device_has_dc_support(adev)) +                        amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif +		amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); +		amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); +		amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); +		amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); +		break;  	default:  		return -EINVAL;  	} @@ -671,6 +753,29 @@ static void nv_pre_asic_init(struct amdgpu_device *adev)  {  } +static int nv_update_umd_stable_pstate(struct amdgpu_device *adev, +				       bool enter) +{ +	if (enter) +		amdgpu_gfx_rlc_enter_safe_mode(adev); +	else +		amdgpu_gfx_rlc_exit_safe_mode(adev); + +	if (adev->gfx.funcs->update_perfmon_mgcg) +		adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); + +	/* +	 * The ASPM function is not fully enabled and verified on +	 * Navi yet. Temporarily skip this until ASPM enabled. +	 */ +#if 0 +	if (adev->nbio.funcs->enable_aspm) +		adev->nbio.funcs->enable_aspm(adev, !enter); +#endif + +	return 0; +} +  static const struct amdgpu_asic_funcs nv_asic_funcs =  {  	.read_disabled_bios = &nv_read_disabled_bios, @@ -691,6 +796,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =  	.get_pcie_replay_count = &nv_get_pcie_replay_count,  	.supports_baco = &nv_asic_supports_baco,  	.pre_asic_init = &nv_pre_asic_init, +	.update_umd_stable_pstate = &nv_update_umd_stable_pstate,  };  static int nv_common_early_init(void *handle) @@ -706,6 +812,8 @@ static int nv_common_early_init(void *handle)  	adev->pcie_wreg = &nv_pcie_wreg;  	adev->pcie_rreg64 = &nv_pcie_rreg64;  	adev->pcie_wreg64 = &nv_pcie_wreg64; +	adev->pciep_rreg = &nv_pcie_port_rreg; +	adev->pciep_wreg = &nv_pcie_port_wreg;  	/* TODO: will add them during VCN v2 implementation */  	adev->uvd_ctx_rreg = NULL; @@ -833,6 +941,46 @@ static int nv_common_early_init(void *handle)  		adev->external_rev_id = adev->rev_id + 0x32;  		break; +	case CHIP_VANGOGH: +		adev->apu_flags |= AMD_APU_IS_VANGOGH; +		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | +			AMD_CG_SUPPORT_GFX_MGLS | +			AMD_CG_SUPPORT_GFX_CP_LS | +			AMD_CG_SUPPORT_GFX_RLC_LS | +			AMD_CG_SUPPORT_GFX_CGCG | +			AMD_CG_SUPPORT_GFX_CGLS | +			AMD_CG_SUPPORT_GFX_3D_CGCG | +			AMD_CG_SUPPORT_GFX_3D_CGLS | +			AMD_CG_SUPPORT_MC_MGCG | +			AMD_CG_SUPPORT_MC_LS | +			AMD_CG_SUPPORT_GFX_FGCG | +			AMD_CG_SUPPORT_VCN_MGCG | +			AMD_CG_SUPPORT_JPEG_MGCG; +		adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | +			AMD_PG_SUPPORT_VCN | +			AMD_PG_SUPPORT_VCN_DPG | +			AMD_PG_SUPPORT_JPEG; +		if (adev->apu_flags & AMD_APU_IS_VANGOGH) +			adev->external_rev_id = adev->rev_id + 0x01; +		break; +	case CHIP_DIMGREY_CAVEFISH: +		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | +			AMD_CG_SUPPORT_GFX_CGCG | +			AMD_CG_SUPPORT_GFX_3D_CGCG | +			AMD_CG_SUPPORT_VCN_MGCG | +			AMD_CG_SUPPORT_JPEG_MGCG | +			AMD_CG_SUPPORT_MC_MGCG | +			AMD_CG_SUPPORT_MC_LS | +			AMD_CG_SUPPORT_HDP_MGCG | +			AMD_CG_SUPPORT_HDP_LS | +			AMD_CG_SUPPORT_IH_CG; +		adev->pg_flags = AMD_PG_SUPPORT_VCN | +			AMD_PG_SUPPORT_VCN_DPG | +			AMD_PG_SUPPORT_JPEG | +			AMD_PG_SUPPORT_ATHUB | +			AMD_PG_SUPPORT_MMHUB; +		adev->external_rev_id = adev->rev_id + 0x3c; +		break;  	default:  		/* FIXME: not supported yet */  		return -EINVAL; @@ -1060,6 +1208,7 @@ static int nv_common_set_clockgating_state(void *handle,  	case CHIP_NAVI12:  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		adev->nbio.funcs->update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE);  		adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index aeef50a6a54b..515d67bf249f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -34,4 +34,6 @@ int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);  int navi12_reg_base_init(struct amdgpu_device *adev);  int sienna_cichlid_reg_base_init(struct amdgpu_device *adev); +void vangogh_reg_base_init(struct amdgpu_device *adev); +int dimgrey_cavefish_reg_base_init(struct amdgpu_device *adev);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 4137dc710aaf..d65a5339d354 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -86,21 +86,22 @@ struct psp_gfx_ctrl  /* TEE Gfx Command IDs for the ring buffer interface. */  enum psp_gfx_cmd_id  { -    GFX_CMD_ID_LOAD_TA      = 0x00000001,   /* load TA */ -    GFX_CMD_ID_UNLOAD_TA    = 0x00000002,   /* unload TA */ -    GFX_CMD_ID_INVOKE_CMD   = 0x00000003,   /* send command to TA */ -    GFX_CMD_ID_LOAD_ASD     = 0x00000004,   /* load ASD Driver */ -    GFX_CMD_ID_SETUP_TMR    = 0x00000005,   /* setup TMR region */ -    GFX_CMD_ID_LOAD_IP_FW   = 0x00000006,   /* load HW IP FW */ -    GFX_CMD_ID_DESTROY_TMR  = 0x00000007,   /* destroy TMR region */ -    GFX_CMD_ID_SAVE_RESTORE = 0x00000008,   /* save/restore HW IP FW */ -    GFX_CMD_ID_SETUP_VMR    = 0x00000009,   /* setup VMR region */ -    GFX_CMD_ID_DESTROY_VMR  = 0x0000000A,   /* destroy VMR region */ -    GFX_CMD_ID_PROG_REG     = 0x0000000B,   /* program regs */ -    GFX_CMD_ID_CLEAR_VF_FW  = 0x0000000D,   /* Clear VF FW, to be used on VF shutdown. */ +    GFX_CMD_ID_LOAD_TA            = 0x00000001,   /* load TA */ +    GFX_CMD_ID_UNLOAD_TA          = 0x00000002,   /* unload TA */ +    GFX_CMD_ID_INVOKE_CMD         = 0x00000003,   /* send command to TA */ +    GFX_CMD_ID_LOAD_ASD           = 0x00000004,   /* load ASD Driver */ +    GFX_CMD_ID_SETUP_TMR          = 0x00000005,   /* setup TMR region */ +    GFX_CMD_ID_LOAD_IP_FW         = 0x00000006,   /* load HW IP FW */ +    GFX_CMD_ID_DESTROY_TMR        = 0x00000007,   /* destroy TMR region */ +    GFX_CMD_ID_SAVE_RESTORE       = 0x00000008,   /* save/restore HW IP FW */ +    GFX_CMD_ID_SETUP_VMR          = 0x00000009,   /* setup VMR region */ +    GFX_CMD_ID_DESTROY_VMR        = 0x0000000A,   /* destroy VMR region */ +    GFX_CMD_ID_PROG_REG           = 0x0000000B,   /* program regs */ +    GFX_CMD_ID_CLEAR_VF_FW        = 0x0000000D,   /* Clear VF FW, to be used on VF shutdown. */ +    GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F,   /* Query GPUVA of the Fw Attestation DB */      /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */ -    GFX_CMD_ID_LOAD_TOC     = 0x00000020,   /* Load TOC and obtain TMR size */ -    GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021,   /* Indicates all graphics fw loaded, start RLC autoload */ +    GFX_CMD_ID_LOAD_TOC           = 0x00000020,   /* Load TOC and obtain TMR size */ +    GFX_CMD_ID_AUTOLOAD_RLC       = 0x00000021,   /* Indicates all graphics fw loaded, start RLC autoload */  };  /* Command to load Trusted Application binary into PSP OS. */ @@ -285,6 +286,25 @@ union psp_gfx_commands      struct psp_gfx_cmd_load_toc         cmd_load_toc;  }; +struct psp_gfx_uresp_reserved +{ +    uint32_t reserved[8]; +}; + +/* Command-specific response for Fw Attestation Db */ +struct psp_gfx_uresp_fwar_db_info +{ +    uint32_t fwar_db_addr_lo; +    uint32_t fwar_db_addr_hi; +}; + +/* Union of command-specific responses for GPCOM ring. */ +union psp_gfx_uresp +{ +    struct psp_gfx_uresp_reserved reserved; +    struct psp_gfx_uresp_fwar_db_info fwar_db_info; +}; +  /* Structure of GFX Response buffer.  * For GPCOM I/F it is part of GFX_CMD_RESP buffer, for RBI  * it is separate buffer. @@ -297,9 +317,11 @@ struct psp_gfx_resp      uint32_t	fw_addr_hi;	/* +12 bits [63:32] of FW address within TMR (in response to cmd_load_ip_fw command) */      uint32_t	tmr_size;	/* +16 size of the TMR to be reserved including MM fw and Gfx fw in response to cmd_load_toc command */ -    uint32_t	reserved[3]; +    uint32_t	reserved[11]; + +    union psp_gfx_uresp uresp;      /* +64 response union containing command-specific responses */ -    /* total 32 bytes */ +    /* total 96 bytes */  };  /* Structure of Command buffer pointed by psp_gfx_rb_frame.cmd_buf_addr_hi diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 6c5d9612abcb..bd4248c93c49 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -59,6 +59,10 @@ MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin");  MODULE_FIRMWARE("amdgpu/sienna_cichlid_ta.bin");  MODULE_FIRMWARE("amdgpu/navy_flounder_sos.bin");  MODULE_FIRMWARE("amdgpu/navy_flounder_ta.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_asd.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_toc.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sos.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ta.bin");  /* address block */  #define smnMP1_FIRMWARE_FLAGS		0x3010024 @@ -77,7 +81,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)  {  	struct amdgpu_device *adev = psp->adev;  	const char *chip_name; -	char fw_name[30]; +	char fw_name[PSP_FW_NAME_LEN];  	int err = 0;  	const struct ta_firmware_header_v1_0 *ta_hdr; @@ -105,24 +109,26 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)  	case CHIP_NAVY_FLOUNDER:  		chip_name = "navy_flounder";  		break; +	case CHIP_VANGOGH: +		chip_name = "vangogh"; +		break; +	case CHIP_DIMGREY_CAVEFISH: +		chip_name = "dimgrey_cavefish"; +		break;  	default:  		BUG();  	} -	err = psp_init_sos_microcode(psp, chip_name); -	if (err) -		return err; - -	if (adev->asic_type != CHIP_SIENNA_CICHLID && -	    adev->asic_type != CHIP_NAVY_FLOUNDER) { -		err = psp_init_asd_microcode(psp, chip_name); -		if (err) -			return err; -	}  	switch (adev->asic_type) {  	case CHIP_VEGA20:  	case CHIP_ARCTURUS: +		err = psp_init_sos_microcode(psp, chip_name); +		if (err) +			return err; +		err = psp_init_asd_microcode(psp, chip_name); +		if (err) +			return err;  		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);  		err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);  		if (err) { @@ -150,6 +156,12 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)  	case CHIP_NAVI10:  	case CHIP_NAVI14:  	case CHIP_NAVI12: +		err = psp_init_sos_microcode(psp, chip_name); +		if (err) +			return err; +		err = psp_init_asd_microcode(psp, chip_name); +		if (err) +			return err;  		if (amdgpu_sriov_vf(adev))  			break;  		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); @@ -180,7 +192,19 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)  		break;  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: -		err = psp_init_ta_microcode(&adev->psp, chip_name); +	case CHIP_DIMGREY_CAVEFISH: +		err = psp_init_sos_microcode(psp, chip_name); +		if (err) +			return err; +		err = psp_init_ta_microcode(psp, chip_name); +		if (err) +			return err; +		break; +	case CHIP_VANGOGH: +		err = psp_init_asd_microcode(psp, chip_name); +		if (err) +			return err; +		err = psp_init_toc_microcode(psp, chip_name);  		if (err)  			return err;  		break; @@ -196,7 +220,7 @@ out2:  	return err;  } -int psp_v11_0_wait_for_bootloader(struct psp_context *psp) +static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)  {  	struct amdgpu_device *adev = psp->adev; @@ -407,8 +431,8 @@ static int psp_v11_0_ring_init(struct psp_context *psp,  	struct amdgpu_device *adev = psp->adev;  	if ((!amdgpu_sriov_vf(adev)) && -	    (adev->asic_type != CHIP_SIENNA_CICHLID) && -	    (adev->asic_type != CHIP_NAVY_FLOUNDER)) +	    !(adev->asic_type >= CHIP_SIENNA_CICHLID && +	    adev->asic_type <= CHIP_DIMGREY_CAVEFISH))  		psp_v11_0_reroute_ih(psp);  	ring = &psp->km_ring; @@ -615,7 +639,7 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg)  static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)  {  	struct psp_memory_training_context *ctx = &psp->mem_train_ctx; -	uint32_t *pcache = (uint32_t*)ctx->sys_cache; +	uint32_t *pcache = (uint32_t *)ctx->sys_cache;  	struct amdgpu_device *adev = psp->adev;  	uint32_t p2c_header[4];  	uint32_t sz; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 75489313dbad..c4828bd3264b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -39,6 +39,8 @@  MODULE_FIRMWARE("amdgpu/renoir_asd.bin");  MODULE_FIRMWARE("amdgpu/renoir_ta.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_ta.bin");  /* address block */  #define smnMP1_FIRMWARE_FLAGS		0x3010024 @@ -54,7 +56,10 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)  	switch (adev->asic_type) {  	case CHIP_RENOIR: -		chip_name = "renoir"; +		if (adev->apu_flags & AMD_APU_IS_RENOIR) +			chip_name = "renoir"; +		else +			chip_name = "green_sardine";  		break;  	default:  		BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 5f304d61999e..eb5dc6c5b46e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -243,7 +243,9 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)   * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine   *   * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from   * @ib: IB object to schedule + * @flags: unused   *   * Schedule an IB in the DMA ring (VI).   */ @@ -299,7 +301,9 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)   * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring   *   * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Add a DMA fence packet to the ring to write   * the fence seq number and DMA trap packet to generate @@ -590,6 +594,7 @@ error_free_wb:   * sdma_v2_4_ring_test_ib - test an IB on the DMA engine   *   * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   * Test a simple IB in the DMA ring (VI).   * Returns 0 on success, error on failure. @@ -740,6 +745,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,  /**   * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw   * + * @ring: amdgpu_ring structure holding ring information   * @ib: indirect buffer to fill with padding   *   */ @@ -789,7 +795,8 @@ static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)   * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA   *   * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address   *   * Update the page table base and flush the VM TLB   * using sDMA (VI). @@ -1188,10 +1195,11 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)  /**   * sdma_v2_4_emit_copy_buffer - copy buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to   * @src_offset: src GPU address   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer + * @tmz: unused   *   * Copy GPU buffers using the DMA engine (VI).   * Used by the amdgpu ttm implementation to move pages if @@ -1216,7 +1224,7 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,  /**   * sdma_v2_4_emit_fill_buffer - fill buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to   * @src_data: value to write to buffer   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index c59f6f6f4c09..ad308d8c6d30 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -417,7 +417,9 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)   * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine   *   * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from   * @ib: IB object to schedule + * @flags: unused   *   * Schedule an IB in the DMA ring (VI).   */ @@ -473,7 +475,9 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)   * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring   *   * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Add a DMA fence packet to the ring to write   * the fence seq number and DMA trap packet to generate @@ -862,6 +866,7 @@ error_free_wb:   * sdma_v3_0_ring_test_ib - test an IB on the DMA engine   *   * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   * Test a simple IB in the DMA ring (VI).   * Returns 0 on success, error on failure. @@ -1011,6 +1016,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,  /**   * sdma_v3_0_ring_pad_ib - pad the IB to the required number of dw   * + * @ring: amdgpu_ring structure holding ring information   * @ib: indirect buffer to fill with padding   *   */ @@ -1060,7 +1066,8 @@ static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)   * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA   *   * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address   *   * Update the page table base and flush the VM TLB   * using sDMA (VI). @@ -1626,10 +1633,11 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)  /**   * sdma_v3_0_emit_copy_buffer - copy buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to   * @src_offset: src GPU address   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer + * @tmz: unused   *   * Copy GPU buffers using the DMA engine (VI).   * Used by the amdgpu ttm implementation to move pages if @@ -1654,7 +1662,7 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,  /**   * sdma_v3_0_emit_fill_buffer - fill buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to   * @src_data: value to write to buffer   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 86fb1eddf5a6..ce56e93c6886 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -69,6 +69,7 @@ MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");  MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");  MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin");  MODULE_FIRMWARE("amdgpu/renoir_sdma.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_sdma.bin");  #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK  0x000000F8L  #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L @@ -568,7 +569,7 @@ static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev)  			break;  	} -	memset((void*)adev->sdma.instance, 0, +	memset((void *)adev->sdma.instance, 0,  		sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);  } @@ -592,9 +593,6 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)  	struct amdgpu_firmware_info *info = NULL;  	const struct common_firmware_header *header = NULL; -	if (amdgpu_sriov_vf(adev)) -		return 0; -  	DRM_DEBUG("\n");  	switch (adev->asic_type) { @@ -619,7 +617,10 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)  		chip_name = "arcturus";  		break;  	case CHIP_RENOIR: -		chip_name = "renoir"; +		if (adev->apu_flags & AMD_APU_IS_RENOIR) +			chip_name = "renoir"; +		else +			chip_name = "green_sardine";  		break;  	default:  		BUG(); @@ -639,8 +640,8 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)  		if (adev->asic_type == CHIP_ARCTURUS) {  			/* Acturus will leverage the same FW memory  			   for every SDMA instance */ -			memcpy((void*)&adev->sdma.instance[i], -			       (void*)&adev->sdma.instance[0], +			memcpy((void *)&adev->sdma.instance[i], +			       (void *)&adev->sdma.instance[0],  			       sizeof(struct amdgpu_sdma_instance));  		}  		else { @@ -833,7 +834,9 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)   * sdma_v4_0_ring_emit_ib - Schedule an IB on the DMA engine   *   * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from   * @ib: IB object to schedule + * @flags: unused   *   * Schedule an IB in the DMA ring (VEGA10).   */ @@ -908,7 +911,9 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)   * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring   *   * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Add a DMA fence packet to the ring to write   * the fence seq number and DMA trap packet to generate @@ -1106,7 +1111,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)  	}  } -/** +/*   * sdma_v4_0_rb_cntl - get parameters for rb_cntl   */  static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) @@ -1569,6 +1574,7 @@ error_free_wb:   * sdma_v4_0_ring_test_ib - test an IB on the DMA engine   *   * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   * Test a simple IB in the DMA ring (VEGA10).   * Returns 0 on success, error on failure. @@ -1665,10 +1671,9 @@ static void sdma_v4_0_vm_copy_pte(struct amdgpu_ib *ib,   *   * @ib: indirect buffer to fill with commands   * @pe: addr of the page entry - * @addr: dst addr to write into pe + * @value: dst addr to write into pe   * @count: number of page entries to update   * @incr: increase next addr by incr bytes - * @flags: access flags   *   * Update PTEs by writing them manually using sDMA (VEGA10).   */ @@ -1723,8 +1728,8 @@ static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib,  /**   * sdma_v4_0_ring_pad_ib - pad the IB to the required number of dw   * + * @ring: amdgpu_ring structure holding ring information   * @ib: indirect buffer to fill with padding - *   */  static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)  { @@ -1768,7 +1773,8 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)   * sdma_v4_0_ring_emit_vm_flush - vm flush using sDMA   *   * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address   *   * Update the page table base and flush the VM TLB   * using sDMA (VEGA10). @@ -2487,10 +2493,11 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)  /**   * sdma_v4_0_emit_copy_buffer - copy buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to   * @src_offset: src GPU address   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer + * @tmz: if a secure copy should be used   *   * Copy GPU buffers using the DMA engine (VEGA10/12).   * Used by the amdgpu ttm implementation to move pages if @@ -2516,7 +2523,7 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,  /**   * sdma_v4_0_emit_fill_buffer - fill buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to   * @src_data: value to write to buffer   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 9c72b95b7463..b208b81005bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -203,7 +203,7 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)  	const struct common_firmware_header *header = NULL;  	const struct sdma_firmware_header_v1_0 *hdr; -	if (amdgpu_sriov_vf(adev)) +	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_NAVI12))  		return 0;  	DRM_DEBUG("\n"); @@ -392,7 +392,9 @@ static void sdma_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)   * sdma_v5_0_ring_emit_ib - Schedule an IB on the DMA engine   *   * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from   * @ib: IB object to schedule + * @flags: unused   *   * Schedule an IB in the DMA ring (NAVI10).   */ @@ -469,7 +471,9 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)   * sdma_v5_0_ring_emit_fence - emit a fence on the DMA ring   *   * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Add a DMA fence packet to the ring to write   * the fence seq number and DMA trap packet to generate @@ -959,6 +963,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)   * sdma_v5_0_ring_test_ib - test an IB on the DMA engine   *   * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   * Test a simple IB in the DMA ring (NAVI10).   * Returns 0 on success, error on failure. @@ -1061,10 +1066,9 @@ static void sdma_v5_0_vm_copy_pte(struct amdgpu_ib *ib,   *   * @ib: indirect buffer to fill with commands   * @pe: addr of the page entry - * @addr: dst addr to write into pe + * @value: dst addr to write into pe   * @count: number of page entries to update   * @incr: increase next addr by incr bytes - * @flags: access flags   *   * Update PTEs by writing them manually using sDMA (NAVI10).   */ @@ -1118,6 +1122,7 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,  /**   * sdma_v5_0_ring_pad_ib - pad the IB + * @ring: amdgpu_ring structure holding ring information   * @ib: indirect buffer to fill with padding   *   * Pad the IB with NOPs to a boundary multiple of 8. @@ -1170,7 +1175,8 @@ static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)   * sdma_v5_0_ring_emit_vm_flush - vm flush using sDMA   *   * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address   *   * Update the page table base and flush the VM TLB   * using sDMA (NAVI10). @@ -1686,10 +1692,11 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev)  /**   * sdma_v5_0_emit_copy_buffer - copy buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to   * @src_offset: src GPU address   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer + * @tmz: if a secure copy should be used   *   * Copy GPU buffers using the DMA engine (NAVI10).   * Used by the amdgpu ttm implementation to move pages if @@ -1715,7 +1722,7 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib,  /**   * sdma_v5_0_emit_fill_buffer - fill buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to fill   * @src_data: value to write to buffer   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 9f3952723c63..f1ba36a094da 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -46,6 +46,9 @@  MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin");  MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sdma.bin"); + +MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin");  #define SDMA1_REG_OFFSET 0x600  #define SDMA3_REG_OFFSET 0x400 @@ -87,6 +90,8 @@ static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev)  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		break;  	default:  		break; @@ -124,7 +129,7 @@ static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev)  			break;  	} -	memset((void*)adev->sdma.instance, 0, +	memset((void *)adev->sdma.instance, 0,  	       sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES);  } @@ -148,7 +153,7 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)  	struct amdgpu_firmware_info *info = NULL;  	const struct common_firmware_header *header = NULL; -	if (amdgpu_sriov_vf(adev)) +	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_SIENNA_CICHLID))  		return 0;  	DRM_DEBUG("\n"); @@ -160,6 +165,12 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)  	case CHIP_NAVY_FLOUNDER:  		chip_name = "navy_flounder";  		break; +	case CHIP_VANGOGH: +		chip_name = "vangogh"; +		break; +	case CHIP_DIMGREY_CAVEFISH: +		chip_name = "dimgrey_cavefish"; +		break;  	default:  		BUG();  	} @@ -175,10 +186,10 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)  		goto out;  	for (i = 1; i < adev->sdma.num_instances; i++) { -		if (adev->asic_type == CHIP_SIENNA_CICHLID || -		    adev->asic_type == CHIP_NAVY_FLOUNDER) { -			memcpy((void*)&adev->sdma.instance[i], -			       (void*)&adev->sdma.instance[0], +		if (adev->asic_type >= CHIP_SIENNA_CICHLID && +		    adev->asic_type <= CHIP_DIMGREY_CAVEFISH) { +			memcpy((void *)&adev->sdma.instance[i], +			       (void *)&adev->sdma.instance[0],  			       sizeof(struct amdgpu_sdma_instance));  		} else {  			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); @@ -186,7 +197,7 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)  			if (err)  				goto out; -			err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]); +			err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[i]);  			if (err)  				goto out;  		} @@ -345,7 +356,9 @@ static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)   * sdma_v5_2_ring_emit_ib - Schedule an IB on the DMA engine   *   * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from   * @ib: IB object to schedule + * @flags: unused   *   * Schedule an IB in the DMA ring.   */ @@ -407,7 +420,9 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)   * sdma_v5_2_ring_emit_fence - emit a fence on the DMA ring   *   * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Add a DMA fence packet to the ring to write   * the fence seq number and DMA trap packet to generate @@ -696,7 +711,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)  		temp &= 0xFF0FFF;  		temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |  			 (CACHE_WRITE_POLICY_L2__DEFAULT << 14) | -			 0x01000000); +			 SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);  		WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);  		if (!amdgpu_sriov_vf(adev)) { @@ -795,6 +810,37 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)  	return 0;  } +static int sdma_v5_2_soft_reset(void *handle) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	u32 grbm_soft_reset; +	u32 tmp; +	int i; + +	for (i = 0; i < adev->sdma.num_instances; i++) { +		grbm_soft_reset = REG_SET_FIELD(0, +						GRBM_SOFT_RESET, SOFT_RESET_SDMA0, +						1); +		grbm_soft_reset <<= i; + +		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); +		tmp |= grbm_soft_reset; +		DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); +		WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); +		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + +		udelay(50); + +		tmp &= ~grbm_soft_reset; +		WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); +		tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); + +		udelay(50); +	} + +	return 0; +} +  /**   * sdma_v5_2_start - setup and start the async dma engines   * @@ -826,6 +872,7 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)  			msleep(1000);  	} +	sdma_v5_2_soft_reset(adev);  	/* unhalt the MEs */  	sdma_v5_2_enable(adev, true);  	/* enable sdma ring preemption */ @@ -905,6 +952,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)   * sdma_v5_2_ring_test_ib - test an IB on the DMA engine   *   * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   * Test a simple IB in the DMA ring.   * Returns 0 on success, error on failure. @@ -1006,10 +1054,9 @@ static void sdma_v5_2_vm_copy_pte(struct amdgpu_ib *ib,   *   * @ib: indirect buffer to fill with commands   * @pe: addr of the page entry - * @addr: dst addr to write into pe + * @value: dst addr to write into pe   * @count: number of page entries to update   * @incr: increase next addr by incr bytes - * @flags: access flags   *   * Update PTEs by writing them manually using sDMA.   */ @@ -1065,6 +1112,7 @@ static void sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib *ib,   * sdma_v5_2_ring_pad_ib - pad the IB   *   * @ib: indirect buffer to fill with padding + * @ring: amdgpu_ring structure holding ring information   *   * Pad the IB with NOPs to a boundary multiple of 8.   */ @@ -1116,7 +1164,8 @@ static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)   * sdma_v5_2_ring_emit_vm_flush - vm flush using sDMA   *   * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address   *   * Update the page table base and flush the VM TLB   * using sDMA. @@ -1169,8 +1218,12 @@ static int sdma_v5_2_early_init(void *handle)  		adev->sdma.num_instances = 4;  		break;  	case CHIP_NAVY_FLOUNDER: +	case CHIP_DIMGREY_CAVEFISH:  		adev->sdma.num_instances = 2;  		break; +	case CHIP_VANGOGH: +		adev->sdma.num_instances = 1; +		break;  	default:  		break;  	} @@ -1348,13 +1401,6 @@ static int sdma_v5_2_wait_for_idle(void *handle)  	return -ETIMEDOUT;  } -static int sdma_v5_2_soft_reset(void *handle) -{ -	/* todo */ - -	return 0; -} -  static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)  {  	int i, r = 0; @@ -1567,6 +1613,8 @@ static int sdma_v5_2_set_clockgating_state(void *handle,  	switch (adev->asic_type) {  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER: +	case CHIP_VANGOGH: +	case CHIP_DIMGREY_CAVEFISH:  		sdma_v5_2_update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE ? true : false);  		sdma_v5_2_update_medium_grain_light_sleep(adev, @@ -1683,10 +1731,11 @@ static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev)  /**   * sdma_v5_2_emit_copy_buffer - copy buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to   * @src_offset: src GPU address   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer + * @tmz: if a secure copy should be used   *   * Copy GPU buffers using the DMA engine.   * Used by the amdgpu ttm implementation to move pages if @@ -1712,7 +1761,7 @@ static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib,  /**   * sdma_v5_2_emit_fill_buffer - fill buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to fill   * @src_data: value to write to buffer   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index e5e336fd9e94..3cf0589bfea5 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1350,7 +1350,7 @@ static void si_vga_set_state(struct amdgpu_device *adev, bool state)  static u32 si_get_xclk(struct amdgpu_device *adev)  { -        u32 reference_clock = adev->clock.spll.reference_freq; +	u32 reference_clock = adev->clock.spll.reference_freq;  	u32 tmp;  	tmp = RREG32(CG_CLKPIN_CNTL_2); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 7d2bbcbe547b..488497ad5e0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -81,7 +81,9 @@ static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,   * si_dma_ring_emit_fence - emit a fence on the DMA ring   *   * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Add a DMA fence packet to the ring to write   * the fence seq number and DMA trap packet to generate @@ -244,6 +246,7 @@ error_free_wb:   * si_dma_ring_test_ib - test an IB on the DMA engine   *   * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   * Test a simple IB in the DMA ring (VI).   * Returns 0 on success, error on failure. @@ -401,6 +404,7 @@ static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib,  /**   * si_dma_pad_ib - pad the IB to the required number of dw   * + * @ring: amdgpu_ring pointer   * @ib: indirect buffer to fill with padding   *   */ @@ -436,7 +440,8 @@ static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)   * si_dma_ring_emit_vm_flush - cik vm flush using sDMA   *   * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address   *   * Update the page table base and flush the VM TLB   * using sDMA (VI). @@ -764,10 +769,11 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev)  /**   * si_dma_emit_copy_buffer - copy buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to   * @src_offset: src GPU address   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer + * @tmz: is this a secure operation   *   * Copy GPU buffers using the DMA engine (VI).   * Used by the amdgpu ttm implementation to move pages if @@ -790,7 +796,7 @@ static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib,  /**   * si_dma_emit_fill_buffer - fill buffer using the sDMA engine   * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to   * @src_data: value to write to buffer   * @dst_offset: dst GPU address   * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 621727d7fd18..51880f6ef634 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -43,7 +43,7 @@ static void si_ih_enable_interrupts(struct amdgpu_device *adev)  	WREG32(IH_RB_CNTL, ih_rb_cntl);  	adev->irq.ih.enabled = true;  } -   +  static void si_ih_disable_interrupts(struct amdgpu_device *adev)  {  	u32 ih_rb_cntl = RREG32(IH_RB_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index 7fb240c4990c..5c7d769aee3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -212,6 +212,7 @@ static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control)  /**   * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device.   * + * @control: I2C adapter reference   * @address: The I2C address of the slave device.   * @data: The data to transmit over the bus.   * @numbytes: The amount of data to transmit. @@ -313,7 +314,9 @@ Err:  /**   * smu_v11_0_i2c_receive - Receive a block of data over the I2C bus from a slave device.   * + * @control: I2C adapter reference   * @address: The I2C address of the slave device. + * @data: Placeholder to store received data.   * @numbytes: The amount of data to transmit.   * @i2c_flag: Flags for transmission   * diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c new file mode 100644 index 000000000000..e9c474c217ec --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c @@ -0,0 +1,77 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v11_0.h" +#include "smuio/smuio_11_0_0_offset.h" +#include "smuio/smuio_11_0_0_sh_mask.h" + +static u32 smuio_v11_0_get_rom_index_offset(struct amdgpu_device *adev) +{ +	return SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX); +} + +static u32 smuio_v11_0_get_rom_data_offset(struct amdgpu_device *adev) +{ +	return SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA); +} + +static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool enable) +{ +	u32 def, data; + +	/* enable/disable ROM CG is not supported on APU */ +	if (adev->flags & AMD_IS_APU) +		return; + +	def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); + +	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) +		data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | +			CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); +	else +		data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | +			CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK; + +	if (def != data) +		WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); +} + +static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +{ +	u32 data; + +	/* CGTT_ROM_CLK_CTRL0 is not available for APU */ +	if (adev->flags & AMD_IS_APU) +		return; + +	data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); +	if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) +		*flags |= AMD_CG_SUPPORT_ROM_MGCG; +} + +const struct amdgpu_smuio_funcs smuio_v11_0_funcs = { +	.get_rom_index_offset = smuio_v11_0_get_rom_index_offset, +	.get_rom_data_offset = smuio_v11_0_get_rom_data_offset, +	.update_rom_clock_gating = smuio_v11_0_update_rom_clock_gating, +	.get_clock_gating_state = smuio_v11_0_get_clock_gating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h new file mode 100644 index 000000000000..43c4262f2b8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V11_0_H__ +#define __SMUIO_V11_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v11_0_funcs; + +#endif /* __SMUIO_V11_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c new file mode 100644 index 000000000000..8417890af227 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c @@ -0,0 +1,77 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v9_0.h" +#include "smuio/smuio_9_0_offset.h" +#include "smuio/smuio_9_0_sh_mask.h" + +static u32 smuio_v9_0_get_rom_index_offset(struct amdgpu_device *adev) +{ +	return SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX); +} + +static u32 smuio_v9_0_get_rom_data_offset(struct amdgpu_device *adev) +{ +	return SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA); +} + +static void smuio_v9_0_update_rom_clock_gating(struct amdgpu_device *adev, bool enable) +{ +	u32 def, data; + +	/* enable/disable ROM CG is not supported on APU */ +	if (adev->flags & AMD_IS_APU) +		return; + +	def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); + +	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) +		data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | +			CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); +	else +		data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | +			CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK; + +	if (def != data) +		WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); +} + +static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +{ +	u32 data; + +	/* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */ +	if (adev->flags & AMD_IS_APU) +		return; + +	data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); +	if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) +		*flags |= AMD_CG_SUPPORT_ROM_MGCG; +} + +const struct amdgpu_smuio_funcs smuio_v9_0_funcs = { +	.get_rom_index_offset = smuio_v9_0_get_rom_index_offset, +	.get_rom_data_offset = smuio_v9_0_get_rom_data_offset, +	.update_rom_clock_gating = smuio_v9_0_update_rom_clock_gating, +	.get_clock_gating_state = smuio_v9_0_get_clock_gating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h new file mode 100644 index 000000000000..fc265ce9837d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V9_0_H__ +#define __SMUIO_V9_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v9_0_funcs; + +#endif /* __SMUIO_V9_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index afcccc6c0fc6..8a23636ecc27 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -42,8 +42,6 @@  #include "sdma1/sdma1_4_0_offset.h"  #include "hdp/hdp_4_0_offset.h"  #include "hdp/hdp_4_0_sh_mask.h" -#include "smuio/smuio_9_0_offset.h" -#include "smuio/smuio_9_0_sh_mask.h"  #include "nbio/nbio_7_0_default.h"  #include "nbio/nbio_7_0_offset.h"  #include "nbio/nbio_7_0_sh_mask.h" @@ -62,6 +60,7 @@  #include "nbio_v7_0.h"  #include "nbio_v7_4.h"  #include "vega10_ih.h" +#include "navi10_ih.h"  #include "sdma_v4_0.h"  #include "uvd_v7_0.h"  #include "vce_v4_0.h" @@ -70,6 +69,8 @@  #include "jpeg_v2_0.h"  #include "vcn_v2_5.h"  #include "jpeg_v2_5.h" +#include "smuio_v9_0.h" +#include "smuio_v11_0.h"  #include "dce_virtual.h"  #include "mxgpu_ai.h"  #include "amdgpu_smu.h" @@ -90,12 +91,6 @@  #define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK		0x00020000L  #define mmHDP_MEM_POWER_CTRL_BASE_IDX	0 -/* for Vega20/arcturus regiter offset change */ -#define	mmROM_INDEX_VG20				0x00e4 -#define	mmROM_INDEX_VG20_BASE_IDX			0 -#define	mmROM_DATA_VG20					0x00e5 -#define	mmROM_DATA_VG20_BASE_IDX			0 -  /*   * Indirect registers accessor   */ @@ -295,17 +290,10 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev,  	dw_ptr = (u32 *)bios;  	length_dw = ALIGN(length_bytes, 4) / 4; -	switch (adev->asic_type) { -	case CHIP_VEGA20: -	case CHIP_ARCTURUS: -		rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX_VG20); -		rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA_VG20); -		break; -	default: -		rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX); -		rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA); -		break; -	} +	rom_index_offset = +		adev->smuio.funcs->get_rom_index_offset(adev); +	rom_data_offset = +		adev->smuio.funcs->get_rom_data_offset(adev);  	/* set rom index to 0 */  	WREG32(rom_index_offset, 0); @@ -717,6 +705,12 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)  	else  		adev->df.funcs = &df_v1_7_funcs; +	if (adev->asic_type == CHIP_VEGA20 || +	    adev->asic_type == CHIP_ARCTURUS) +		adev->smuio.funcs = &smuio_v11_0_funcs; +	else +		adev->smuio.funcs = &smuio_v9_0_funcs; +  	adev->rev_id = soc15_get_rev_id(adev);  	switch (adev->asic_type) { @@ -734,9 +728,15 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)  				else  					amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);  			} -			amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); +			if (adev->asic_type == CHIP_VEGA20) +				amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); +			else +				amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);  		} else { -			amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); +			if (adev->asic_type == CHIP_VEGA20) +				amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); +			else +				amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);  			if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {  				if (adev->asic_type == CHIP_VEGA20)  					amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); @@ -787,9 +787,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)  		if (amdgpu_sriov_vf(adev)) {  			if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))  				amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); -			amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); +			amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);  		} else { -			amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); +			amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block);  			if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP))  				amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);  		} @@ -822,7 +822,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)  			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);  #if defined(CONFIG_DRM_AMD_DC)                  else if (amdgpu_device_has_dc_support(adev)) -                        amdgpu_device_ip_block_add(adev, &dm_ip_block); +			amdgpu_device_ip_block_add(adev, &dm_ip_block);  #endif  		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);  		amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); @@ -1169,7 +1169,6 @@ static int soc15_common_early_init(void *handle)  				AMD_CG_SUPPORT_GFX_CGLS |  				AMD_CG_SUPPORT_BIF_LS |  				AMD_CG_SUPPORT_HDP_LS | -				AMD_CG_SUPPORT_ROM_MGCG |  				AMD_CG_SUPPORT_MC_MGCG |  				AMD_CG_SUPPORT_MC_LS |  				AMD_CG_SUPPORT_SDMA_MGCG | @@ -1187,7 +1186,6 @@ static int soc15_common_early_init(void *handle)  				AMD_CG_SUPPORT_GFX_CGLS |  				AMD_CG_SUPPORT_BIF_LS |  				AMD_CG_SUPPORT_HDP_LS | -				AMD_CG_SUPPORT_ROM_MGCG |  				AMD_CG_SUPPORT_MC_MGCG |  				AMD_CG_SUPPORT_MC_LS |  				AMD_CG_SUPPORT_SDMA_MGCG | @@ -1195,8 +1193,7 @@ static int soc15_common_early_init(void *handle)  			adev->pg_flags = AMD_PG_SUPPORT_SDMA |  				AMD_PG_SUPPORT_MMHUB | -				AMD_PG_SUPPORT_VCN | -				AMD_PG_SUPPORT_VCN_DPG; +				AMD_PG_SUPPORT_VCN;  		} else {  			adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |  				AMD_CG_SUPPORT_GFX_MGLS | @@ -1212,7 +1209,6 @@ static int soc15_common_early_init(void *handle)  				AMD_CG_SUPPORT_HDP_LS |  				AMD_CG_SUPPORT_DRM_MGCG |  				AMD_CG_SUPPORT_DRM_LS | -				AMD_CG_SUPPORT_ROM_MGCG |  				AMD_CG_SUPPORT_MC_MGCG |  				AMD_CG_SUPPORT_MC_LS |  				AMD_CG_SUPPORT_SDMA_MGCG | @@ -1243,7 +1239,15 @@ static int soc15_common_early_init(void *handle)  		break;  	case CHIP_RENOIR:  		adev->asic_funcs = &soc15_asic_funcs; -		adev->apu_flags |= AMD_APU_IS_RENOIR; +		if (adev->pdev->device == 0x1636) +			adev->apu_flags |= AMD_APU_IS_RENOIR; +		else +			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; + +		if (adev->apu_flags & AMD_APU_IS_RENOIR) +			adev->external_rev_id = adev->rev_id + 0x91; +		else +			adev->external_rev_id = adev->rev_id + 0xa1;  		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |  				 AMD_CG_SUPPORT_GFX_MGLS |  				 AMD_CG_SUPPORT_GFX_3D_CGCG | @@ -1257,7 +1261,6 @@ static int soc15_common_early_init(void *handle)  				 AMD_CG_SUPPORT_SDMA_LS |  				 AMD_CG_SUPPORT_BIF_LS |  				 AMD_CG_SUPPORT_HDP_LS | -				 AMD_CG_SUPPORT_ROM_MGCG |  				 AMD_CG_SUPPORT_VCN_MGCG |  				 AMD_CG_SUPPORT_JPEG_MGCG |  				 AMD_CG_SUPPORT_IH_CG | @@ -1268,7 +1271,6 @@ static int soc15_common_early_init(void *handle)  				 AMD_PG_SUPPORT_VCN |  				 AMD_PG_SUPPORT_JPEG |  				 AMD_PG_SUPPORT_VCN_DPG; -		adev->external_rev_id = adev->rev_id + 0x91;  		break;  	default:  		/* FIXME: not supported yet */ @@ -1498,24 +1500,6 @@ static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable  		WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data);  } -static void soc15_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev, -						       bool enable) -{ -	uint32_t def, data; - -	def = data = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0)); - -	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) -		data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | -			CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); -	else -		data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | -			CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK; - -	if (def != data) -		WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data); -} -  static int soc15_common_set_clockgating_state(void *handle,  					    enum amd_clockgating_state state)  { @@ -1538,7 +1522,7 @@ static int soc15_common_set_clockgating_state(void *handle,  				state == AMD_CG_STATE_GATE);  		soc15_update_drm_light_sleep(adev,  				state == AMD_CG_STATE_GATE); -		soc15_update_rom_medium_grain_clock_gating(adev, +		adev->smuio.funcs->update_rom_clock_gating(adev,  				state == AMD_CG_STATE_GATE);  		adev->df.funcs->update_medium_grain_clock_gating(adev,  				state == AMD_CG_STATE_GATE); @@ -1555,8 +1539,6 @@ static int soc15_common_set_clockgating_state(void *handle,  				state == AMD_CG_STATE_GATE);  		soc15_update_drm_light_sleep(adev,  				state == AMD_CG_STATE_GATE); -		soc15_update_rom_medium_grain_clock_gating(adev, -				state == AMD_CG_STATE_GATE);  		break;  	case CHIP_ARCTURUS:  		soc15_update_hdp_light_sleep(adev, @@ -1594,9 +1576,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags)  		*flags |= AMD_CG_SUPPORT_DRM_LS;  	/* AMD_CG_SUPPORT_ROM_MGCG */ -	data = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0)); -	if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) -		*flags |= AMD_CG_SUPPORT_ROM_MGCG; +	adev->smuio.funcs->get_clock_gating_state(adev, flags);  	adev->df.funcs->get_clockgating_state(adev, flags);  } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index e40140bf6699..ce3319993b4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -181,6 +181,7 @@ static void tonga_ih_irq_disable(struct amdgpu_device *adev)   * tonga_ih_get_wptr - get the IH ring buffer wptr   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr   *   * Get the IH ring buffer wptr from either the register   * or the writeback memory buffer (VI).  Also check for @@ -215,6 +216,8 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev,   * tonga_ih_decode_iv - decode an interrupt vector   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into   *   * Decodes the interrupt vector at the current rptr   * position and also advance the position. @@ -247,6 +250,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev,   * tonga_ih_set_rptr - set the IH ring buffer rptr   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr   *   * Set the IH ring buffer rptr.   */ diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 5288617ca552..96d7769609f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -253,7 +253,7 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev  static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev,  					   void *ras_error_status)  { -	struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; +	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;  	uint32_t umc_inst        = 0;  	uint32_t ch_inst         = 0; @@ -368,7 +368,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,  static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev,  					     void *ras_error_status)  { -	struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; +	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;  	uint32_t umc_inst        = 0;  	uint32_t ch_inst         = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c index 5665c77a9d58..a064c097690c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c @@ -170,7 +170,7 @@ static void umc_v8_7_querry_uncorrectable_error_count(struct amdgpu_device *adev  static void umc_v8_7_query_ras_error_count(struct amdgpu_device *adev,  					   void *ras_error_status)  { -	struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; +	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;  	uint32_t umc_inst        = 0;  	uint32_t ch_inst         = 0; @@ -260,7 +260,7 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,  static void umc_v8_7_query_ras_error_address(struct amdgpu_device *adev,  					     void *ras_error_status)  { -	struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; +	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;  	uint32_t umc_inst        = 0;  	uint32_t ch_inst         = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 7cf4b11a65c5..10ecae257b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -80,7 +80,9 @@ static void uvd_v3_1_ring_set_wptr(struct amdgpu_ring *ring)   * uvd_v3_1_ring_emit_ib - execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: iob associated with the indirect buffer   * @ib: indirect buffer to execute + * @flags: flags associated with the indirect buffer   *   * Write ring commands to execute the indirect buffer   */ @@ -99,7 +101,9 @@ static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring,   * uvd_v3_1_ring_emit_fence - emit an fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write a fence and a trap command to the ring.   */ @@ -277,15 +281,8 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev)   */  static int uvd_v3_1_fw_validate(struct amdgpu_device *adev)  { -	void *ptr; -	uint32_t ucode_len, i; -	uint32_t keysel; - -	ptr = adev->uvd.inst[0].cpu_addr; -	ptr += 192 + 16; -	memcpy(&ucode_len, ptr, 4); -	ptr += ucode_len; -	memcpy(&keysel, ptr, 4); +	int i; +	uint32_t keysel = adev->uvd.keyselect;  	WREG32(mmUVD_FW_START, keysel); @@ -550,6 +547,8 @@ static int uvd_v3_1_sw_init(void *handle)  	struct amdgpu_ring *ring;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	int r; +	void *ptr; +	uint32_t ucode_len;  	/* UVD TRAP */  	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); @@ -571,6 +570,13 @@ static int uvd_v3_1_sw_init(void *handle)  	if (r)  		return r; +	/* Retrieval firmware validate key */ +	ptr = adev->uvd.inst[0].cpu_addr; +	ptr += 192 + 16; +	memcpy(&ucode_len, ptr, 4); +	ptr += ucode_len; +	memcpy(&adev->uvd.keyselect, ptr, 4); +  	r = amdgpu_uvd_entity_init(adev);  	return r; @@ -617,7 +623,7 @@ static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev,  /**   * uvd_v3_1_hw_init - start and test UVD block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   * Initialize the hardware, boot up the VCPU and do some testing   */ @@ -684,7 +690,7 @@ done:  /**   * uvd_v3_1_hw_fini - stop the hardware block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   * Stop the UVD block, mark ring as not ready any more   */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index b0c0c438fc93..a70d2a0de316 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -149,7 +149,7 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,  /**   * uvd_v4_2_hw_init - start and test UVD block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   * Initialize the hardware, boot up the VCPU and do some testing   */ @@ -204,7 +204,7 @@ done:  /**   * uvd_v4_2_hw_fini - stop the hardware block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   * Stop the UVD block, mark ring as not ready any more   */ @@ -437,7 +437,9 @@ static void uvd_v4_2_stop(struct amdgpu_device *adev)   * uvd_v4_2_ring_emit_fence - emit an fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write a fence and a trap command to the ring.   */ @@ -502,7 +504,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)   * uvd_v4_2_ring_emit_ib - execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: iob associated with the indirect buffer   * @ib: indirect buffer to execute + * @flags: flags associated with the indirect buffer   *   * Write ring commands to execute the indirect buffer   */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 6e57001f6d0a..f3b0a927101b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -145,7 +145,7 @@ static int uvd_v5_0_sw_fini(void *handle)  /**   * uvd_v5_0_hw_init - start and test UVD block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   * Initialize the hardware, boot up the VCPU and do some testing   */ @@ -202,7 +202,7 @@ done:  /**   * uvd_v5_0_hw_fini - stop the hardware block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   * Stop the UVD block, mark ring as not ready any more   */ @@ -454,7 +454,9 @@ static void uvd_v5_0_stop(struct amdgpu_device *adev)   * uvd_v5_0_ring_emit_fence - emit an fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write a fence and a trap command to the ring.   */ @@ -518,7 +520,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)   * uvd_v5_0_ring_emit_ib - execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from   * @ib: indirect buffer to execute + * @flags: unused   *   * Write ring commands to execute the indirect buffer   */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 666bfa4a0b8e..760859880c1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -198,9 +198,9 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)  /**   * uvd_v6_0_enc_get_create_msg - generate a UVD ENC create msg   * - * @adev: amdgpu_device pointer   * @ring: ring we should submit the msg to   * @handle: session handle to use + * @bo: amdgpu object for which we query the offset   * @fence: optional fence to return   *   * Open up a stream for HW test @@ -261,9 +261,9 @@ err:  /**   * uvd_v6_0_enc_get_destroy_msg - generate a UVD ENC destroy msg   * - * @adev: amdgpu_device pointer   * @ring: ring we should submit the msg to   * @handle: session handle to use + * @bo: amdgpu object for which we query the offset   * @fence: optional fence to return   *   * Close up a stream for HW test or if userspace failed to do so @@ -326,6 +326,7 @@ err:   * uvd_v6_0_enc_ring_test_ib - test if UVD ENC IBs are working   *   * @ring: the engine to test on + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   */  static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) @@ -464,7 +465,7 @@ static int uvd_v6_0_sw_fini(void *handle)  /**   * uvd_v6_0_hw_init - start and test UVD block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   * Initialize the hardware, boot up the VCPU and do some testing   */ @@ -533,7 +534,7 @@ done:  /**   * uvd_v6_0_hw_fini - stop the hardware block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   * Stop the UVD block, mark ring as not ready any more   */ @@ -891,7 +892,9 @@ static void uvd_v6_0_stop(struct amdgpu_device *adev)   * uvd_v6_0_ring_emit_fence - emit an fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write a fence and a trap command to the ring.   */ @@ -921,7 +924,9 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq   * uvd_v6_0_enc_ring_emit_fence - emit an enc fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write enc a fence and a trap command to the ring.   */ @@ -986,7 +991,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)   * uvd_v6_0_ring_emit_ib - execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from   * @ib: indirect buffer to execute + * @flags: unused   *   * Write ring commands to execute the indirect buffer   */ @@ -1012,7 +1019,9 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,   * uvd_v6_0_enc_ring_emit_ib - enc execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from   * @ib: indirect buffer to execute + * @flags: unused   *   * Write enc ring commands to execute the indirect buffer   */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index b44c8677ce8d..312ecf6d24a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -206,9 +206,9 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)  /**   * uvd_v7_0_enc_get_create_msg - generate a UVD ENC create msg   * - * @adev: amdgpu_device pointer   * @ring: ring we should submit the msg to   * @handle: session handle to use + * @bo: amdgpu object for which we query the offset   * @fence: optional fence to return   *   * Open up a stream for HW test @@ -269,9 +269,9 @@ err:  /**   * uvd_v7_0_enc_get_destroy_msg - generate a UVD ENC destroy msg   * - * @adev: amdgpu_device pointer   * @ring: ring we should submit the msg to   * @handle: session handle to use + * @bo: amdgpu object for which we query the offset   * @fence: optional fence to return   *   * Close up a stream for HW test or if userspace failed to do so @@ -333,6 +333,7 @@ err:   * uvd_v7_0_enc_ring_test_ib - test if UVD ENC IBs are working   *   * @ring: the engine to test on + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT   *   */  static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) @@ -519,7 +520,7 @@ static int uvd_v7_0_sw_fini(void *handle)  /**   * uvd_v7_0_hw_init - start and test UVD block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   * Initialize the hardware, boot up the VCPU and do some testing   */ @@ -597,7 +598,7 @@ done:  /**   * uvd_v7_0_hw_fini - stop the hardware block   * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer   *   * Stop the UVD block, mark ring as not ready any more   */ @@ -1147,7 +1148,9 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev)   * uvd_v7_0_ring_emit_fence - emit an fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write a fence and a trap command to the ring.   */ @@ -1186,7 +1189,9 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq   * uvd_v7_0_enc_ring_emit_fence - emit an enc fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write enc a fence and a trap command to the ring.   */ @@ -1282,7 +1287,9 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,   * uvd_v7_0_ring_emit_ib - execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from   * @ib: indirect buffer to execute + * @flags: unused   *   * Write ring commands to execute the indirect buffer   */ @@ -1313,7 +1320,9 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring,   * uvd_v7_0_enc_ring_emit_ib - enc execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from   * @ib: indirect buffer to execute + * @flags: unused   *   * Write enc ring commands to execute the indirect buffer   */ diff --git a/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c new file mode 100644 index 000000000000..d64d681a05dc --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c @@ -0,0 +1,50 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "vangogh_ip_offset.h" + +void vangogh_reg_base_init(struct amdgpu_device *adev) +{ +	/* HW has more IP blocks,  only initialized the blocke needed by driver */ +	uint32_t i; +	for (i = 0 ; i < MAX_INSTANCE ; ++i) { +		adev->reg_offset[GC_HWIP][i]     = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[HDP_HWIP][i]    = (uint32_t *)(&(HDP_BASE.instance[i])); +		adev->reg_offset[MMHUB_HWIP][i]  = (uint32_t *)(&(MMHUB_BASE.instance[i])); +		adev->reg_offset[ATHUB_HWIP][i]  = (uint32_t *)(&(ATHUB_BASE.instance[i])); +		adev->reg_offset[NBIO_HWIP][i]   = (uint32_t *)(&(NBIO_BASE.instance[i])); +		adev->reg_offset[MP0_HWIP][i]    = (uint32_t *)(&(MP0_BASE.instance[i])); +		adev->reg_offset[MP1_HWIP][i]    = (uint32_t *)(&(MP1_BASE.instance[i])); +		adev->reg_offset[VCN_HWIP][i]    = (uint32_t *)(&(VCN_BASE.instance[i])); +		adev->reg_offset[DF_HWIP][i]     = (uint32_t *)(&(DF_BASE.instance[i])); +		adev->reg_offset[DCE_HWIP][i]    = (uint32_t *)(&(DCN_BASE.instance[i])); +		adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); +		adev->reg_offset[SDMA0_HWIP][i]  = (uint32_t *)(&(GC_BASE.instance[i])); +		adev->reg_offset[SMUIO_HWIP][i]  = (uint32_t *)(&(SMUIO_BASE.instance[i])); +		adev->reg_offset[THM_HWIP][i]    = (uint32_t *)(&(THM_BASE.instance[i])); +	} +} diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 86e1ef732ebe..c734e31a9e65 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -431,7 +431,6 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)   * vcn_v1_0_disable_clock_gating - disable VCN clock gating   *   * @adev: amdgpu_device pointer - * @sw: enable SW clock gating   *   * Disable clock gating for VCN block   */ @@ -558,7 +557,6 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)   * vcn_v1_0_enable_clock_gating - enable VCN clock gating   *   * @adev: amdgpu_device pointer - * @sw: enable SW clock gating   *   * Enable clock gating for VCN block   */ @@ -1445,7 +1443,9 @@ static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring)   * vcn_v1_0_dec_ring_emit_fence - emit an fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write a fence and a trap command to the ring.   */ @@ -1484,7 +1484,9 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64   * vcn_v1_0_dec_ring_emit_ib - execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from   * @ib: indirect buffer to execute + * @flags: unused   *   * Write ring commands to execute the indirect buffer   */ @@ -1619,7 +1621,9 @@ static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring)   * vcn_v1_0_enc_ring_emit_fence - emit an enc fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write enc a fence and a trap command to the ring.   */ @@ -1644,7 +1648,9 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring)   * vcn_v1_0_enc_ring_emit_ib - enc execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from   * @ib: indirect buffer to execute + * @flags: unused   *   * Write enc ring commands to execute the indirect buffer   */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index e5d29dee0c88..d63198c945bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -45,7 +45,7 @@  #define mmUVD_SCRATCH9_INTERNAL_OFFSET				0xc01d  #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET			0x1e1 -#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x5a6 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x5a6  #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET		0x5a7  #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x1e2 @@ -475,7 +475,6 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec   * vcn_v2_0_disable_clock_gating - disable VCN clock gating   *   * @adev: amdgpu_device pointer - * @sw: enable SW clock gating   *   * Disable clock gating for VCN block   */ @@ -636,7 +635,6 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev,   * vcn_v2_0_enable_clock_gating - enable VCN clock gating   *   * @adev: amdgpu_device pointer - * @sw: enable SW clock gating   *   * Enable clock gating for VCN block   */ @@ -800,7 +798,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect)  	WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);  	if (indirect) -		adev->vcn.inst->dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst->dpg_sram_cpu_addr; +		adev->vcn.inst->dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst->dpg_sram_cpu_addr;  	/* enable clock gating */  	vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect); @@ -1397,6 +1395,7 @@ void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)   * vcn_v2_0_dec_ring_insert_nop - insert a nop command   *   * @ring: amdgpu_ring pointer + * @count: the number of NOP packets to insert   *   * Write a nop command to the ring.   */ @@ -1417,7 +1416,9 @@ void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)   * vcn_v2_0_dec_ring_emit_fence - emit an fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write a fence and a trap command to the ring.   */ @@ -1454,7 +1455,9 @@ void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,   * vcn_v2_0_dec_ring_emit_ib - execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from   * @ib: indirect buffer to execute + * @flags: unused   *   * Write ring commands to execute the indirect buffer   */ @@ -1600,7 +1603,9 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring)   * vcn_v2_0_enc_ring_emit_fence - emit an enc fence & trap command   *   * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags   *   * Write enc a fence and a trap command to the ring.   */ @@ -1625,7 +1630,9 @@ void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring)   * vcn_v2_0_enc_ring_emit_ib - enc execute indirect buffer   *   * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from   * @ib: indirect buffer to execute + * @flags: unused   *   * Write enc ring commands to execute the indirect buffer   */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 0f1d3ef8baa7..b6e0f4ba6272 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -45,7 +45,7 @@  #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET			0x431  #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET		0x3b4 -#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x3b5 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x3b5  #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x25c  #define VCN25_MAX_HW_INSTANCES_ARCTURUS			2 @@ -777,7 +777,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo  	WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);  	if (indirect) -		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; +		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;  	/* enable clock gating */  	vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index e074f7ed388c..def583916294 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -44,10 +44,11 @@  #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET			0x431  #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET		0x3b4 -#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x3b5 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x3b5  #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x25c -#define VCN_INSTANCES_SIENNA_CICHLID	 				2 +#define VCN_INSTANCES_SIENNA_CICHLID				2 +#define DEC_SW_RING_ENABLED					FALSE  static int amdgpu_ih_clientid_vcns[] = {  	SOC15_IH_CLIENTID_VCN, @@ -55,8 +56,8 @@ static int amdgpu_ih_clientid_vcns[] = {  };  static int amdgpu_ucode_id_vcns[] = { -       AMDGPU_UCODE_ID_VCN, -       AMDGPU_UCODE_ID_VCN1 +	AMDGPU_UCODE_ID_VCN, +	AMDGPU_UCODE_ID_VCN1  };  static int vcn_v3_0_start_sriov(struct amdgpu_device *adev); @@ -155,6 +156,13 @@ static int vcn_v3_0_sw_init(void *handle)  	if (r)  		return r; +	/* +	 * Note: doorbell assignment is fixed for SRIOV multiple VCN engines +	 * Formula: +	 *   vcn_db_base  = adev->doorbell_index.vcn.vcn_ring0_1 << 1; +	 *   dec_ring_i   = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) +	 *   enc_ring_i,j = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + 1 + j +	 */  	if (amdgpu_sriov_vf(adev)) {  		vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1;  		/* get DWORD offset */ @@ -162,6 +170,7 @@ static int vcn_v3_0_sw_init(void *handle)  	}  	for (i = 0; i < adev->vcn.num_vcn_inst; i++) { +		volatile struct amdgpu_fw_shared *fw_shared;  		if (adev->vcn.harvest_config & (1 << i))  			continue; @@ -192,9 +201,7 @@ static int vcn_v3_0_sw_init(void *handle)  		ring = &adev->vcn.inst[i].ring_dec;  		ring->use_doorbell = true;  		if (amdgpu_sriov_vf(adev)) { -			ring->doorbell_index = vcn_doorbell_index; -			/* NOTE: increment so next VCN engine use next DOORBELL DWORD */ -			vcn_doorbell_index++; +			ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1);  		} else {  			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;  		} @@ -216,9 +223,7 @@ static int vcn_v3_0_sw_init(void *handle)  			ring = &adev->vcn.inst[i].ring_enc[j];  			ring->use_doorbell = true;  			if (amdgpu_sriov_vf(adev)) { -				ring->doorbell_index = vcn_doorbell_index; -				/* NOTE: increment so next VCN engine use next DOORBELL DWORD */ -				vcn_doorbell_index++; +				ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j;  			} else {  				ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;  			} @@ -230,6 +235,11 @@ static int vcn_v3_0_sw_init(void *handle)  			if (r)  				return r;  		} + +		fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; +		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) | +					     cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG); +		fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED);  	}  	if (amdgpu_sriov_vf(adev)) { @@ -253,7 +263,17 @@ static int vcn_v3_0_sw_init(void *handle)  static int vcn_v3_0_sw_fini(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	int r; +	int i, r; + +	for (i = 0; i < adev->vcn.num_vcn_inst; i++) { +		volatile struct amdgpu_fw_shared *fw_shared; + +		if (adev->vcn.harvest_config & (1 << i)) +			continue; +		fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; +		fw_shared->present_flag_0 = 0; +		fw_shared->sw_ring.is_enabled = false; +	}  	if (amdgpu_sriov_vf(adev))  		amdgpu_virt_free_mm_table(adev); @@ -291,17 +311,19 @@ static int vcn_v3_0_hw_init(void *handle)  				continue;  			ring = &adev->vcn.inst[i].ring_dec; -			ring->wptr = 0; -			ring->wptr_old = 0; -			vcn_v3_0_dec_ring_set_wptr(ring); -			ring->sched.ready = true; +			if (ring->sched.ready) { +				ring->wptr = 0; +				ring->wptr_old = 0; +				vcn_v3_0_dec_ring_set_wptr(ring); +			}  			for (j = 0; j < adev->vcn.num_enc_rings; ++j) {  				ring = &adev->vcn.inst[i].ring_enc[j]; -				ring->wptr = 0; -				ring->wptr_old = 0; -				vcn_v3_0_enc_ring_set_wptr(ring); -				ring->sched.ready = true; +				if (ring->sched.ready) { +					ring->wptr = 0; +					ring->wptr_old = 0; +					vcn_v3_0_enc_ring_set_wptr(ring); +				}  			}  		}  	} else { @@ -461,6 +483,15 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst)  		upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));  	WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET2, 0);  	WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + +	/* non-cache window */ +	WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, +		lower_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); +	WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, +		upper_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); +	WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0); +	WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0, +		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));  }  static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) @@ -543,13 +574,16 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx  	/* non-cache window */  	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( -			VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); +			VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), +			lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);  	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( -			VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); +			VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), +			upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);  	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(  			VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);  	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( -			VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); +			VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), +			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);  }  static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) @@ -902,6 +936,7 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)  static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)  { +	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;  	struct amdgpu_ring *ring;  	uint32_t rb_bufsz, tmp; @@ -915,7 +950,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo  	WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);  	if (indirect) -		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; +		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;  	/* enable clock gating */  	vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); @@ -1011,6 +1046,12 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo  	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);  	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp); +	/* Stall DPG before WPTR/RPTR reset */ +	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), +		UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, +		~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); +	fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); +  	/* set the write pointer delay */  	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); @@ -1033,11 +1074,17 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo  	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,  		lower_32_bits(ring->wptr)); +	fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); +	/* Unstall DPG */ +	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), +		0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); +  	return 0;  }  static int vcn_v3_0_start(struct amdgpu_device *adev)  { +	volatile struct amdgpu_fw_shared *fw_shared;  	struct amdgpu_ring *ring;  	uint32_t rb_bufsz, tmp;  	int i, j, k, r; @@ -1180,6 +1227,9 @@ static int vcn_v3_0_start(struct amdgpu_device *adev)  		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);  		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); +		fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; +		fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); +  		/* programm the RB_BASE for ring buffer */  		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,  			lower_32_bits(ring->gpu_addr)); @@ -1192,19 +1242,25 @@ static int vcn_v3_0_start(struct amdgpu_device *adev)  		ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);  		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,  			lower_32_bits(ring->wptr)); +		fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + +		fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);  		ring = &adev->vcn.inst[i].ring_enc[0];  		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));  		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));  		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);  		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));  		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); +		fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); +		fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);  		ring = &adev->vcn.inst[i].ring_enc[1];  		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));  		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));  		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);  		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));  		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); +		fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);  	}  	return 0; @@ -1227,12 +1283,12 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)  	uint32_t table_size;  	uint32_t size, size_dw; +	bool is_vcn_ready; +  	struct mmsch_v3_0_cmd_direct_write  		direct_wt = { {0} };  	struct mmsch_v3_0_cmd_direct_read_modify_write  		direct_rd_mod_wt = { {0} }; -	struct mmsch_v3_0_cmd_direct_polling -		direct_poll = { {0} };  	struct mmsch_v3_0_cmd_end end = { {0} };  	struct mmsch_v3_0_init_header header; @@ -1240,8 +1296,6 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)  		MMSCH_COMMAND__DIRECT_REG_WRITE;  	direct_rd_mod_wt.cmd_header.command_type =  		MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; -	direct_poll.cmd_header.command_type = -		MMSCH_COMMAND__DIRECT_REG_POLLING;  	end.cmd_header.command_type =  		MMSCH_COMMAND__END; @@ -1364,14 +1418,14 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)  		MMSCH_V3_0_INSERT_END();  		/* refine header */ -		header.inst[i].init_status = 1; +		header.inst[i].init_status = 0;  		header.inst[i].table_offset = header.total_size;  		header.inst[i].table_size = table_size;  		header.total_size += table_size;  	}  	/* Update init table header in memory */ -        size = sizeof(struct mmsch_v3_0_init_header); +	size = sizeof(struct mmsch_v3_0_init_header);  	table_loc = (uint32_t *)table->cpu_addr;  	memcpy((void *)table_loc, &header, size); @@ -1422,6 +1476,30 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)  		}  	} +	/* 6, check each VCN's init_status +	 * if it remains as 0, then this VCN is not assigned to current VF +	 * do not start ring for this VCN +	 */ +	size = sizeof(struct mmsch_v3_0_init_header); +	table_loc = (uint32_t *)table->cpu_addr; +	memcpy(&header, (void *)table_loc, size); + +	for (i = 0; i < adev->vcn.num_vcn_inst; i++) { +		if (adev->vcn.harvest_config & (1 << i)) +			continue; + +		is_vcn_ready = (header.inst[i].init_status == 1); +		if (!is_vcn_ready) +			DRM_INFO("VCN(%d) engine is disabled by hypervisor\n", i); + +		ring = &adev->vcn.inst[i].ring_dec; +		ring->sched.ready = is_vcn_ready; +		for (j = 0; j < adev->vcn.num_enc_rings; ++j) { +			ring = &adev->vcn.inst[i].ring_enc[j]; +			ring->sched.ready = is_vcn_ready; +		} +	} +  	return 0;  } @@ -1531,6 +1609,7 @@ static int vcn_v3_0_stop(struct amdgpu_device *adev)  static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,  		   int inst_idx, struct dpg_pause_state *new_state)  { +	volatile struct amdgpu_fw_shared *fw_shared;  	struct amdgpu_ring *ring;  	uint32_t reg_data = 0;  	int ret_code; @@ -1556,23 +1635,36 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,  					UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,  					UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); +				/* Stall DPG before WPTR/RPTR reset */ +				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), +					UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, +					~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); +  				/* Restore */ +				fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; +				fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);  				ring = &adev->vcn.inst[inst_idx].ring_enc[0]; +				ring->wptr = 0;  				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);  				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));  				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);  				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));  				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); +				fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); +				fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);  				ring = &adev->vcn.inst[inst_idx].ring_enc[1]; +				ring->wptr = 0;  				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);  				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));  				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);  				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));  				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); +				fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); -				WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, -					RREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); +				/* Unstall DPG */ +				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), +					0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);  				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,  					UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -1630,10 +1722,6 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) -		WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2, -			lower_32_bits(ring->wptr) | 0x80000000); -  	if (ring->use_doorbell) {  		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);  		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); @@ -1642,6 +1730,98 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)  	}  } +static void vcn_v3_0_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, +				u64 seq, uint32_t flags) +{ +	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + +	amdgpu_ring_write(ring, VCN_DEC_SW_CMD_FENCE); +	amdgpu_ring_write(ring, addr); +	amdgpu_ring_write(ring, upper_32_bits(addr)); +	amdgpu_ring_write(ring, seq); +	amdgpu_ring_write(ring, VCN_DEC_SW_CMD_TRAP); +} + +static void vcn_v3_0_dec_sw_ring_insert_end(struct amdgpu_ring *ring) +{ +	amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); +} + +static void vcn_v3_0_dec_sw_ring_emit_ib(struct amdgpu_ring *ring, +			       struct amdgpu_job *job, +			       struct amdgpu_ib *ib, +			       uint32_t flags) +{ +	uint32_t vmid = AMDGPU_JOB_GET_VMID(job); + +	amdgpu_ring_write(ring, VCN_DEC_SW_CMD_IB); +	amdgpu_ring_write(ring, vmid); +	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); +	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); +	amdgpu_ring_write(ring, ib->length_dw); +} + +static void vcn_v3_0_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, +				uint32_t val, uint32_t mask) +{ +	amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WAIT); +	amdgpu_ring_write(ring, reg << 2); +	amdgpu_ring_write(ring, mask); +	amdgpu_ring_write(ring, val); +} + +static void vcn_v3_0_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring, +				uint32_t vmid, uint64_t pd_addr) +{ +	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; +	uint32_t data0, data1, mask; + +	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + +	/* wait for register write */ +	data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; +	data1 = lower_32_bits(pd_addr); +	mask = 0xffffffff; +	vcn_v3_0_dec_sw_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void vcn_v3_0_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ +	amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WRITE); +	amdgpu_ring_write(ring,	reg << 2); +	amdgpu_ring_write(ring, val); +} + +static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { +	.type = AMDGPU_RING_TYPE_VCN_DEC, +	.align_mask = 0x3f, +	.nop = VCN_DEC_SW_CMD_NO_OP, +	.vmhub = AMDGPU_MMHUB_0, +	.get_rptr = vcn_v3_0_dec_ring_get_rptr, +	.get_wptr = vcn_v3_0_dec_ring_get_wptr, +	.set_wptr = vcn_v3_0_dec_ring_set_wptr, +	.emit_frame_size = +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + +		4 + /* vcn_v3_0_dec_sw_ring_emit_vm_flush */ +		5 + 5 + /* vcn_v3_0_dec_sw_ring_emit_fdec_swe x2 vm fdec_swe */ +		1, /* vcn_v3_0_dec_sw_ring_insert_end */ +	.emit_ib_size = 5, /* vcn_v3_0_dec_sw_ring_emit_ib */ +	.emit_ib = vcn_v3_0_dec_sw_ring_emit_ib, +	.emit_fence = vcn_v3_0_dec_sw_ring_emit_fence, +	.emit_vm_flush = vcn_v3_0_dec_sw_ring_emit_vm_flush, +	.test_ring = amdgpu_vcn_dec_sw_ring_test_ring, +	.test_ib = NULL,//amdgpu_vcn_dec_sw_ring_test_ib, +	.insert_nop = amdgpu_ring_insert_nop, +	.insert_end = vcn_v3_0_dec_sw_ring_insert_end, +	.pad_ib = amdgpu_ring_generic_pad_ib, +	.begin_use = amdgpu_vcn_ring_begin_use, +	.end_use = amdgpu_vcn_ring_end_use, +	.emit_wreg = vcn_v3_0_dec_sw_ring_emit_wreg, +	.emit_reg_wait = vcn_v3_0_dec_sw_ring_emit_reg_wait, +	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; +  static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {  	.type = AMDGPU_RING_TYPE_VCN_DEC,  	.align_mask = 0xf, @@ -1779,9 +1959,13 @@ static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)  		if (adev->vcn.harvest_config & (1 << i))  			continue; -		adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs; +		if (!DEC_SW_RING_ENABLED) +			adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs; +		else +			adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs;  		adev->vcn.inst[i].ring_dec.me = i; -		DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); +		DRM_INFO("VCN(%d) decode%s is enabled in VM mode\n", i, +			  DEC_SW_RING_ENABLED?"(Software Ring)":"");  	}  } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 407c6093c2ec..e5ae31eb744e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -91,6 +91,9 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)  		}  		adev->irq.ih2.enabled = true;  	} + +	if (adev->irq.ih_soft.ring_size) +		adev->irq.ih_soft.enabled = true;  }  /** @@ -366,6 +369,7 @@ static void vega10_ih_irq_disable(struct amdgpu_device *adev)   * vega10_ih_get_wptr - get the IH ring buffer wptr   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr   *   * Get the IH ring buffer wptr from either the register   * or the writeback memory buffer (VEGA10).  Also check for @@ -430,6 +434,8 @@ out:   * vega10_ih_decode_iv - decode an interrupt vector   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into   *   * Decodes the interrupt vector at the current rptr   * position and also advance the position. @@ -473,6 +479,7 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev,   * vega10_ih_irq_rearm - rearm IRQ if lost   *   * @adev: amdgpu_device pointer + * @ih: IH ring to match   *   */  static void vega10_ih_irq_rearm(struct amdgpu_device *adev, @@ -505,6 +512,7 @@ static void vega10_ih_irq_rearm(struct amdgpu_device *adev,   * vega10_ih_set_rptr - set the IH ring buffer rptr   *   * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr   *   * Set the IH ring buffer rptr.   */ @@ -606,6 +614,10 @@ static int vega10_ih_sw_init(void *handle)  	adev->irq.ih2.use_doorbell = true;  	adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1; +	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); +	if (r) +		return r; +  	r = amdgpu_irq_init(adev);  	return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 9bcd0eebc6d7..d56b474b3a21 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1645,6 +1645,7 @@ static int vi_common_set_clockgating_state(void *handle,  	case CHIP_POLARIS12:  	case CHIP_VEGAM:  		vi_common_set_clockgating_state_by_smu(adev, state); +		break;  	default:  		break;  	}  | 
