diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 687 | 
1 files changed, 422 insertions, 265 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6447cd6ca5a8..b4ad1c055c70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -65,6 +65,7 @@  #include "amdgpu_ras.h"  #include "amdgpu_pmu.h"  #include "amdgpu_fru_eeprom.h" +#include "amdgpu_reset.h"  #include <linux/suspend.h>  #include <drm/task_barrier.h> @@ -110,6 +111,7 @@ const char *amdgpu_asic_name[] = {  	"RAVEN",  	"ARCTURUS",  	"RENOIR", +	"ALDEBARAN",  	"NAVI10",  	"NAVI14",  	"NAVI12", @@ -136,7 +138,7 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,  	struct amdgpu_device *adev = drm_to_adev(ddev);  	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); -	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt); +	return sysfs_emit(buf, "%llu\n", cnt);  }  static DEVICE_ATTR(pcie_replay_count, S_IRUGO, @@ -160,7 +162,7 @@ static ssize_t amdgpu_device_get_product_name(struct device *dev,  	struct drm_device *ddev = dev_get_drvdata(dev);  	struct amdgpu_device *adev = drm_to_adev(ddev); -	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_name); +	return sysfs_emit(buf, "%s\n", adev->product_name);  }  static DEVICE_ATTR(product_name, S_IRUGO, @@ -182,7 +184,7 @@ static ssize_t amdgpu_device_get_product_number(struct device *dev,  	struct drm_device *ddev = dev_get_drvdata(dev);  	struct amdgpu_device *adev = drm_to_adev(ddev); -	return snprintf(buf, PAGE_SIZE, "%s\n", adev->product_number); +	return sysfs_emit(buf, "%s\n", adev->product_number);  }  static DEVICE_ATTR(product_number, S_IRUGO, @@ -204,25 +206,25 @@ static ssize_t amdgpu_device_get_serial_number(struct device *dev,  	struct drm_device *ddev = dev_get_drvdata(dev);  	struct amdgpu_device *adev = drm_to_adev(ddev); -	return snprintf(buf, PAGE_SIZE, "%s\n", adev->serial); +	return sysfs_emit(buf, "%s\n", adev->serial);  }  static DEVICE_ATTR(serial_number, S_IRUGO,  		amdgpu_device_get_serial_number, NULL);  /** - * amdgpu_device_supports_atpx - Is the device a dGPU with HG/PX power control + * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control   *   * @dev: drm_device pointer   * - * Returns true if the device is a dGPU with HG/PX power control, + * Returns true if the device is a dGPU with ATPX power control,   * otherwise return false.   */ -bool amdgpu_device_supports_atpx(struct drm_device *dev) +bool amdgpu_device_supports_px(struct drm_device *dev)  {  	struct amdgpu_device *adev = drm_to_adev(dev); -	if (adev->flags & AMD_IS_PX) +	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())  		return true;  	return false;  } @@ -232,14 +234,15 @@ bool amdgpu_device_supports_atpx(struct drm_device *dev)   *   * @dev: drm_device pointer   * - * Returns true if the device is a dGPU with HG/PX power control, + * Returns true if the device is a dGPU with ACPI power control,   * otherwise return false.   */  bool amdgpu_device_supports_boco(struct drm_device *dev)  {  	struct amdgpu_device *adev = drm_to_adev(dev); -	if (adev->has_pr3) +	if (adev->has_pr3 || +	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))  		return true;  	return false;  } @@ -325,6 +328,35 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,  /*   * register access helper functions.   */ + +/* Check if hw access should be skipped because of hotplug or device error */ +bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) +{ +	if (adev->in_pci_err_recovery) +		return true; + +#ifdef CONFIG_LOCKDEP +	/* +	 * This is a bit complicated to understand, so worth a comment. What we assert +	 * here is that the GPU reset is not running on another thread in parallel. +	 * +	 * For this we trylock the read side of the reset semaphore, if that succeeds +	 * we know that the reset is not running in paralell. +	 * +	 * If the trylock fails we assert that we are either already holding the read +	 * side of the lock or are the reset thread itself and hold the write side of +	 * the lock. +	 */ +	if (in_task()) { +		if (down_read_trylock(&adev->reset_sem)) +			up_read(&adev->reset_sem); +		else +			lockdep_assert_held(&adev->reset_sem); +	} +#endif +	return false; +} +  /**   * amdgpu_device_rreg - read a memory mapped IO or indirect register   * @@ -339,7 +371,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,  {  	uint32_t ret; -	if (adev->in_pci_err_recovery) +	if (amdgpu_device_skip_hw_access(adev))  		return 0;  	if ((reg * 4) < adev->rmmio_size) { @@ -376,7 +408,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,   */  uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)  { -	if (adev->in_pci_err_recovery) +	if (amdgpu_device_skip_hw_access(adev))  		return 0;  	if (offset < adev->rmmio_size) @@ -401,7 +433,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)   */  void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)  { -	if (adev->in_pci_err_recovery) +	if (amdgpu_device_skip_hw_access(adev))  		return;  	if (offset < adev->rmmio_size) @@ -424,7 +456,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,  			uint32_t reg, uint32_t v,  			uint32_t acc_flags)  { -	if (adev->in_pci_err_recovery) +	if (amdgpu_device_skip_hw_access(adev))  		return;  	if ((reg * 4) < adev->rmmio_size) { @@ -451,63 +483,20 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,  			     uint32_t reg, uint32_t v)  { -	if (adev->in_pci_err_recovery) +	if (amdgpu_device_skip_hw_access(adev))  		return;  	if (amdgpu_sriov_fullaccess(adev) &&  	    adev->gfx.rlc.funcs &&  	    adev->gfx.rlc.funcs->is_rlcg_access_range) {  		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) -			return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v); +			return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v, 0);  	} else {  		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));  	}  }  /** - * amdgpu_io_rreg - read an IO register - * - * @adev: amdgpu_device pointer - * @reg: dword aligned register offset - * - * Returns the 32 bit value from the offset specified. - */ -u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) -{ -	if (adev->in_pci_err_recovery) -		return 0; - -	if ((reg * 4) < adev->rio_mem_size) -		return ioread32(adev->rio_mem + (reg * 4)); -	else { -		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); -		return ioread32(adev->rio_mem + (mmMM_DATA * 4)); -	} -} - -/** - * amdgpu_io_wreg - write to an IO register - * - * @adev: amdgpu_device pointer - * @reg: dword aligned register offset - * @v: 32 bit value to write to the register - * - * Writes the value specified to the offset specified. - */ -void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) -{ -	if (adev->in_pci_err_recovery) -		return; - -	if ((reg * 4) < adev->rio_mem_size) -		iowrite32(v, adev->rio_mem + (reg * 4)); -	else { -		iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); -		iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); -	} -} - -/**   * amdgpu_mm_rdoorbell - read a doorbell dword   *   * @adev: amdgpu_device pointer @@ -518,7 +507,7 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)   */  u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  { -	if (adev->in_pci_err_recovery) +	if (amdgpu_device_skip_hw_access(adev))  		return 0;  	if (index < adev->doorbell.num_doorbells) { @@ -541,7 +530,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)   */  void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)  { -	if (adev->in_pci_err_recovery) +	if (amdgpu_device_skip_hw_access(adev))  		return;  	if (index < adev->doorbell.num_doorbells) { @@ -562,7 +551,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)   */  u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)  { -	if (adev->in_pci_err_recovery) +	if (amdgpu_device_skip_hw_access(adev))  		return 0;  	if (index < adev->doorbell.num_doorbells) { @@ -585,7 +574,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)   */  void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)  { -	if (adev->in_pci_err_recovery) +	if (amdgpu_device_skip_hw_access(adev))  		return;  	if (index < adev->doorbell.num_doorbells) { @@ -1223,6 +1212,10 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)  		}  	} +	/* Don't post if we need to reset whole hive on init */ +	if (adev->gmc.xgmi.pending_reset) +		return false; +  	if (adev->has_hw_reset) {  		adev->has_hw_reset = false;  		return true; @@ -1429,7 +1422,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,  	struct drm_device *dev = pci_get_drvdata(pdev);  	int r; -	if (amdgpu_device_supports_atpx(dev) && state == VGA_SWITCHEROO_OFF) +	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)  		return;  	if (state == VGA_SWITCHEROO_ON) { @@ -1810,6 +1803,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)  	case CHIP_CARRIZO:  	case CHIP_STONEY:  	case CHIP_VEGA20: +	case CHIP_ALDEBARAN:  	case CHIP_SIENNA_CICHLID:  	case CHIP_NAVY_FLOUNDER:  	case CHIP_DIMGREY_CAVEFISH: @@ -2010,6 +2004,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)  	case CHIP_RAVEN:  	case CHIP_ARCTURUS:  	case CHIP_RENOIR: +	case CHIP_ALDEBARAN:  		if (adev->flags & AMD_IS_APU)  			adev->family = AMDGPU_FAMILY_RV;  		else @@ -2045,6 +2040,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)  	adev->pm.pp_feature = amdgpu_pp_feature_mask;  	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)  		adev->pm.pp_feature &= ~PP_GFXOFF_MASK; +	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) +		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;  	for (i = 0; i < adev->num_ip_blocks; i++) {  		if ((amdgpu_ip_block_mask & (1 << i)) == 0) { @@ -2083,6 +2080,11 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)  				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);  				return r;  			} + +			/*get pf2vf msg info at it's earliest time*/ +			if (amdgpu_sriov_vf(adev)) +				amdgpu_virt_init_data_exchange(adev); +  		}  	} @@ -2149,6 +2151,9 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)  			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)  				continue; +			if (!adev->ip_blocks[i].status.sw) +				continue; +  			/* no need to do the fw loading again if already done*/  			if (adev->ip_blocks[i].status.hw == true)  				break; @@ -2289,7 +2294,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)  	if (adev->gmc.xgmi.num_physical_nodes > 1)  		amdgpu_xgmi_add_device(adev); -	amdgpu_amdkfd_device_init(adev); + +	/* Don't init kfd if whole hive need to be reset during init */ +	if (!adev->gmc.xgmi.pending_reset) +		amdgpu_amdkfd_device_init(adev);  	amdgpu_fru_get_product_info(adev); @@ -2359,8 +2367,8 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)   * Returns 0 on success, negative error code on failure.   */ -static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, -						enum amd_clockgating_state state) +int amdgpu_device_set_cg_state(struct amdgpu_device *adev, +			       enum amd_clockgating_state state)  {  	int i, j, r; @@ -2371,6 +2379,10 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,  		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;  		if (!adev->ip_blocks[i].status.late_initialized)  			continue; +		/* skip CG for GFX on S0ix */ +		if (adev->in_s0ix && +		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX) +			continue;  		/* skip CG for VCE/UVD, it's handled specially */  		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && @@ -2391,7 +2403,8 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,  	return 0;  } -static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) +int amdgpu_device_set_pg_state(struct amdgpu_device *adev, +			       enum amd_powergating_state state)  {  	int i, j, r; @@ -2402,6 +2415,10 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power  		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;  		if (!adev->ip_blocks[i].status.late_initialized)  			continue; +		/* skip PG for GFX on S0ix */ +		if (adev->in_s0ix && +		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX) +			continue;  		/* skip CG for VCE/UVD, it's handled specially */  		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && @@ -2498,6 +2515,11 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)  	if (r)  		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); +	/* For XGMI + passthrough configuration on arcturus, enable light SBR */ +	if (adev->asic_type == CHIP_ARCTURUS && +	    amdgpu_passthrough(adev) && +	    adev->gmc.xgmi.num_physical_nodes > 1) +		smu_set_light_sbr(&adev->smu, true);  	if (adev->gmc.xgmi.num_physical_nodes > 1) {  		mutex_lock(&mgpu_info.mutex); @@ -2678,11 +2700,8 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)  {  	int i, r; -	if (adev->in_poweroff_reboot_com || -	    !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) { -		amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); -		amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); -	} +	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); +	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {  		if (!adev->ip_blocks[i].status.valid) @@ -2722,6 +2741,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)  {  	int i, r; +	if (adev->in_s0ix) +		amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry); +  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {  		if (!adev->ip_blocks[i].status.valid)  			continue; @@ -2734,6 +2756,27 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)  			adev->ip_blocks[i].status.hw = false;  			continue;  		} + +		/* skip unnecessary suspend if we do not initialize them yet */ +		if (adev->gmc.xgmi.pending_reset && +		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || +		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC || +		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || +		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) { +			adev->ip_blocks[i].status.hw = false; +			continue; +		} + +		/* skip suspend of gfx and psp for S0ix +		 * gfx is in gfxoff state, so on resume it will exit gfxoff just +		 * like at runtime. PSP is also part of the always on hardware +		 * so no need to suspend it. +		 */ +		if (adev->in_s0ix && +		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP || +		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)) +			continue; +  		/* XXX handle errors */  		r = adev->ip_blocks[i].version->funcs->suspend(adev);  		/* XXX handle errors */ @@ -2753,7 +2796,6 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)  				}  			}  		} -		adev->ip_blocks[i].status.hw = false;  	}  	return 0; @@ -2774,8 +2816,10 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)  {  	int r; -	if (amdgpu_sriov_vf(adev)) +	if (amdgpu_sriov_vf(adev)) { +		amdgpu_virt_fini_data_exchange(adev);  		amdgpu_virt_request_full_gpu(adev, false); +	}  	r = amdgpu_device_ip_suspend_phase1(adev);  	if (r) @@ -3098,8 +3142,9 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)  		if (adev->asic_reset_res)  			goto fail; -		if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count) -			adev->mmhub.funcs->reset_ras_error_count(adev); +		if (adev->mmhub.ras_funcs && +		    adev->mmhub.ras_funcs->reset_ras_error_count) +			adev->mmhub.ras_funcs->reset_ras_error_count(adev);  	} else {  		task_barrier_full(&hive->tb); @@ -3209,7 +3254,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	struct drm_device *ddev = adev_to_drm(adev);  	struct pci_dev *pdev = adev->pdev;  	int r, i; -	bool atpx = false; +	bool px = false;  	u32 max_MBps;  	adev->shutdown = false; @@ -3257,7 +3302,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	/* mutex initialization are all done here so we  	 * can recall function without having locking issues */ -	atomic_set(&adev->irq.ih.lock, 0);  	mutex_init(&adev->firmware.mutex);  	mutex_init(&adev->pm.mutex);  	mutex_init(&adev->gfx.gpu_clock_mutex); @@ -3290,6 +3334,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	INIT_LIST_HEAD(&adev->shadow_list);  	mutex_init(&adev->shadow_list_lock); +	INIT_LIST_HEAD(&adev->reset_list); +  	INIT_DELAYED_WORK(&adev->delayed_init_work,  			  amdgpu_device_delayed_init_work_handler);  	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, @@ -3328,17 +3374,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);  	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); -	/* io port mapping */ -	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { -		if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { -			adev->rio_mem_size = pci_resource_len(adev->pdev, i); -			adev->rio_mem = pci_iomap(adev->pdev, i, adev->rio_mem_size); -			break; -		} -	} -	if (adev->rio_mem == NULL) -		DRM_INFO("PCI I/O BAR is not found.\n"); -  	/* enable PCIE atomic ops */  	r = pci_enable_atomic_ops_to_root(adev->pdev,  					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 | @@ -3381,16 +3416,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)  		vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); -	if (amdgpu_device_supports_atpx(ddev)) -		atpx = true; -	if (amdgpu_has_atpx() && -	    (amdgpu_is_atpx_hybrid() || -	     amdgpu_has_atpx_dgpu_power_cntl()) && -	    !pci_is_thunderbolt_attached(adev->pdev)) +	if (amdgpu_device_supports_px(ddev)) { +		px = true;  		vga_switcheroo_register_client(adev->pdev, -					       &amdgpu_switcheroo_ops, atpx); -	if (atpx) +					       &amdgpu_switcheroo_ops, px);  		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); +	}  	if (amdgpu_emu_mode == 1) {  		/* post the asic on emulation mode */ @@ -3398,6 +3429,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,  		goto fence_driver_init;  	} +	amdgpu_reset_init(adev); +  	/* detect if we are with an SRIOV vbios */  	amdgpu_device_detect_sriov_bios(adev); @@ -3405,10 +3438,28 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	 *  E.g., driver was not cleanly unloaded previously, etc.  	 */  	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { -		r = amdgpu_asic_reset(adev); -		if (r) { -			dev_err(adev->dev, "asic reset on init failed\n"); -			goto failed; +		if (adev->gmc.xgmi.num_physical_nodes) { +			dev_info(adev->dev, "Pending hive reset.\n"); +			adev->gmc.xgmi.pending_reset = true; +			/* Only need to init necessary block for SMU to handle the reset */ +			for (i = 0; i < adev->num_ip_blocks; i++) { +				if (!adev->ip_blocks[i].status.valid) +					continue; +				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || +				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || +				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || +				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) { +					DRM_DEBUG("IP %s disabled for hw_init.\n", +						adev->ip_blocks[i].version->funcs->name); +					adev->ip_blocks[i].status.hw = true; +				} +			} +		} else { +			r = amdgpu_asic_reset(adev); +			if (r) { +				dev_err(adev->dev, "asic reset on init failed\n"); +				goto failed; +			}  		}  	} @@ -3474,11 +3525,11 @@ fence_driver_init:  			adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;  			adev->virt.ops = NULL;  			r = -EAGAIN; -			goto failed; +			goto release_ras_con;  		}  		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");  		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); -		goto failed; +		goto release_ras_con;  	}  	dev_info(adev->dev, @@ -3539,19 +3590,19 @@ fence_driver_init:  	/* enable clockgating, etc. after ib tests, etc. since some blocks require  	 * explicit gating rather than handling it automatically.  	 */ -	r = amdgpu_device_ip_late_init(adev); -	if (r) { -		dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); -		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); -		goto failed; +	if (!adev->gmc.xgmi.pending_reset) { +		r = amdgpu_device_ip_late_init(adev); +		if (r) { +			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); +			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); +			goto release_ras_con; +		} +		/* must succeed. */ +		amdgpu_ras_resume(adev); +		queue_delayed_work(system_wq, &adev->delayed_init_work, +				   msecs_to_jiffies(AMDGPU_RESUME_MS));  	} -	/* must succeed. */ -	amdgpu_ras_resume(adev); - -	queue_delayed_work(system_wq, &adev->delayed_init_work, -			   msecs_to_jiffies(AMDGPU_RESUME_MS)); -  	if (amdgpu_sriov_vf(adev))  		flush_delayed_work(&adev->delayed_init_work); @@ -3568,11 +3619,18 @@ fence_driver_init:  	if (amdgpu_device_cache_pci_state(adev->pdev))  		pci_restore_state(pdev); +	if (adev->gmc.xgmi.pending_reset) +		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work, +				   msecs_to_jiffies(AMDGPU_RESUME_MS)); +  	return 0; +release_ras_con: +	amdgpu_release_ras_context(adev); +  failed:  	amdgpu_vf_error_trans_all(adev); -	if (atpx) +	if (px)  		vga_switcheroo_fini_domain_pm_ops(adev->dev);  failed_unmap: @@ -3594,6 +3652,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)  {  	dev_info(adev->dev, "amdgpu: finishing device.\n");  	flush_delayed_work(&adev->delayed_init_work); +	ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);  	adev->shutdown = true;  	kfree(adev->pci_state); @@ -3622,6 +3681,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)  	release_firmware(adev->firmware.gpu_info_fw);  	adev->firmware.gpu_info_fw = NULL;  	adev->accel_working = false; + +	amdgpu_reset_fini(adev); +  	/* free i2c buses */  	if (!amdgpu_device_has_dc_support(adev))  		amdgpu_i2c_fini(adev); @@ -3631,18 +3693,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)  	kfree(adev->bios);  	adev->bios = NULL; -	if (amdgpu_has_atpx() && -	    (amdgpu_is_atpx_hybrid() || -	     amdgpu_has_atpx_dgpu_power_cntl()) && -	    !pci_is_thunderbolt_attached(adev->pdev)) +	if (amdgpu_device_supports_px(adev_to_drm(adev))) {  		vga_switcheroo_unregister_client(adev->pdev); -	if (amdgpu_device_supports_atpx(adev_to_drm(adev)))  		vga_switcheroo_fini_domain_pm_ops(adev->dev); +	}  	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)  		vga_client_register(adev->pdev, NULL, NULL, NULL); -	if (adev->rio_mem) -		pci_iounmap(adev->pdev, adev->rio_mem); -	adev->rio_mem = NULL;  	iounmap(adev->rmmio);  	adev->rmmio = NULL;  	amdgpu_device_doorbell_fini(adev); @@ -3673,14 +3729,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)   */  int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)  { -	struct amdgpu_device *adev; -	struct drm_crtc *crtc; -	struct drm_connector *connector; -	struct drm_connector_list_iter iter; +	struct amdgpu_device *adev = drm_to_adev(dev);  	int r; -	adev = drm_to_adev(dev); -  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)  		return 0; @@ -3692,61 +3743,19 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)  	cancel_delayed_work_sync(&adev->delayed_init_work); -	if (!amdgpu_device_has_dc_support(adev)) { -		/* turn off display hw */ -		drm_modeset_lock_all(dev); -		drm_connector_list_iter_begin(dev, &iter); -		drm_for_each_connector_iter(connector, &iter) -			drm_helper_connector_dpms(connector, -						  DRM_MODE_DPMS_OFF); -		drm_connector_list_iter_end(&iter); -		drm_modeset_unlock_all(dev); -			/* unpin the front buffers and cursors */ -		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { -			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); -			struct drm_framebuffer *fb = crtc->primary->fb; -			struct amdgpu_bo *robj; - -			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { -				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); -				r = amdgpu_bo_reserve(aobj, true); -				if (r == 0) { -					amdgpu_bo_unpin(aobj); -					amdgpu_bo_unreserve(aobj); -				} -			} - -			if (fb == NULL || fb->obj[0] == NULL) { -				continue; -			} -			robj = gem_to_amdgpu_bo(fb->obj[0]); -			/* don't unpin kernel fb objects */ -			if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { -				r = amdgpu_bo_reserve(robj, true); -				if (r == 0) { -					amdgpu_bo_unpin(robj); -					amdgpu_bo_unreserve(robj); -				} -			} -		} -	} -  	amdgpu_ras_suspend(adev);  	r = amdgpu_device_ip_suspend_phase1(adev); -	amdgpu_amdkfd_suspend(adev, adev->in_runpm); +	if (!adev->in_s0ix) +		amdgpu_amdkfd_suspend(adev, adev->in_runpm);  	/* evict vram memory */  	amdgpu_bo_evict_vram(adev);  	amdgpu_fence_driver_suspend(adev); -	if (adev->in_poweroff_reboot_com || -	    !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) -		r = amdgpu_device_ip_suspend_phase2(adev); -	else -		amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry); +	r = amdgpu_device_ip_suspend_phase2(adev);  	/* evict remaining vram memory  	 * This second call to evict vram is to evict the gart page table  	 * using the CPU. @@ -3768,16 +3777,13 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)   */  int amdgpu_device_resume(struct drm_device *dev, bool fbcon)  { -	struct drm_connector *connector; -	struct drm_connector_list_iter iter;  	struct amdgpu_device *adev = drm_to_adev(dev); -	struct drm_crtc *crtc;  	int r = 0;  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)  		return 0; -	if (amdgpu_acpi_is_s0ix_supported(adev)) +	if (adev->in_s0ix)  		amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);  	/* post card */ @@ -3802,50 +3808,17 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)  	queue_delayed_work(system_wq, &adev->delayed_init_work,  			   msecs_to_jiffies(AMDGPU_RESUME_MS)); -	if (!amdgpu_device_has_dc_support(adev)) { -		/* pin cursors */ -		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { -			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - -			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { -				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); -				r = amdgpu_bo_reserve(aobj, true); -				if (r == 0) { -					r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); -					if (r != 0) -						dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r); -					amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); -					amdgpu_bo_unreserve(aobj); -				} -			} -		} +	if (!adev->in_s0ix) { +		r = amdgpu_amdkfd_resume(adev, adev->in_runpm); +		if (r) +			return r;  	} -	r = amdgpu_amdkfd_resume(adev, adev->in_runpm); -	if (r) -		return r;  	/* Make sure IB tests flushed */  	flush_delayed_work(&adev->delayed_init_work); -	/* blat the mode back in */ -	if (fbcon) { -		if (!amdgpu_device_has_dc_support(adev)) { -			/* pre DCE11 */ -			drm_helper_resume_force_mode(dev); - -			/* turn on display hw */ -			drm_modeset_lock_all(dev); - -			drm_connector_list_iter_begin(dev, &iter); -			drm_for_each_connector_iter(connector, &iter) -				drm_helper_connector_dpms(connector, -							  DRM_MODE_DPMS_ON); -			drm_connector_list_iter_end(&iter); - -			drm_modeset_unlock_all(dev); -		} +	if (fbcon)  		amdgpu_fbdev_set_suspend(adev, 0); -	}  	drm_kms_helper_poll_enable(dev); @@ -4143,11 +4116,11 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,  	amdgpu_amdkfd_post_reset(adev);  error: -	amdgpu_virt_release_full_gpu(adev, true);  	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {  		amdgpu_inc_vram_lost(adev);  		r = amdgpu_device_recover_vram(adev);  	} +	amdgpu_virt_release_full_gpu(adev, true);  	return r;  } @@ -4224,6 +4197,8 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)  		case CHIP_SIENNA_CICHLID:  		case CHIP_NAVY_FLOUNDER:  		case CHIP_DIMGREY_CAVEFISH: +		case CHIP_VANGOGH: +		case CHIP_ALDEBARAN:  			break;  		default:  			goto disabled; @@ -4237,15 +4212,60 @@ disabled:  		return false;  } +int amdgpu_device_mode1_reset(struct amdgpu_device *adev) +{ +        u32 i; +        int ret = 0; + +        amdgpu_atombios_scratch_regs_engine_hung(adev, true); + +        dev_info(adev->dev, "GPU mode1 reset\n"); + +        /* disable BM */ +        pci_clear_master(adev->pdev); + +        amdgpu_device_cache_pci_state(adev->pdev); + +        if (amdgpu_dpm_is_mode1_reset_supported(adev)) { +                dev_info(adev->dev, "GPU smu mode1 reset\n"); +                ret = amdgpu_dpm_mode1_reset(adev); +        } else { +                dev_info(adev->dev, "GPU psp mode1 reset\n"); +                ret = psp_gpu_reset(adev); +        } + +        if (ret) +                dev_err(adev->dev, "GPU mode1 reset failed\n"); + +        amdgpu_device_load_pci_state(adev->pdev); + +        /* wait for asic to come out of reset */ +        for (i = 0; i < adev->usec_timeout; i++) { +                u32 memsize = adev->nbio.funcs->get_memsize(adev); + +                if (memsize != 0xffffffff) +                        break; +                udelay(1); +        } + +        amdgpu_atombios_scratch_regs_engine_hung(adev, false); +        return ret; +} -static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, -					struct amdgpu_job *job, -					bool *need_full_reset_arg) +int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, +				 struct amdgpu_reset_context *reset_context)  {  	int i, r = 0; -	bool need_full_reset  = *need_full_reset_arg; +	struct amdgpu_job *job = NULL; +	bool need_full_reset = +		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); -	amdgpu_debugfs_wait_dump(adev); +	if (reset_context->reset_req_dev == adev) +		job = reset_context->job; + +	/* no need to dump if device is not in good state during probe period */ +	if (!adev->gmc.xgmi.pending_reset) +		amdgpu_debugfs_wait_dump(adev);  	if (amdgpu_sriov_vf(adev)) {  		/* stop the data exchange thread */ @@ -4266,6 +4286,13 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,  	if(job)  		drm_sched_increase_karma(&job->base); +	r = amdgpu_reset_prepare_hwcontext(adev, reset_context); +	/* If reset handler not implemented, continue; otherwise return */ +	if (r == -ENOSYS) +		r = 0; +	else +		return r; +  	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */  	if (!amdgpu_sriov_vf(adev)) { @@ -4284,30 +4311,47 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,  		if (need_full_reset)  			r = amdgpu_device_ip_suspend(adev); - -		*need_full_reset_arg = need_full_reset; +		if (need_full_reset) +			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); +		else +			clear_bit(AMDGPU_NEED_FULL_RESET, +				  &reset_context->flags);  	}  	return r;  } -static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, -			       struct list_head *device_list_handle, -			       bool *need_full_reset_arg, -			       bool skip_hw_reset) +int amdgpu_do_asic_reset(struct list_head *device_list_handle, +			 struct amdgpu_reset_context *reset_context)  {  	struct amdgpu_device *tmp_adev = NULL; -	bool need_full_reset = *need_full_reset_arg, vram_lost = false; +	bool need_full_reset, skip_hw_reset, vram_lost = false;  	int r = 0; +	/* Try reset handler method first */ +	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, +				    reset_list); +	r = amdgpu_reset_perform_reset(tmp_adev, reset_context); +	/* If reset handler not implemented, continue; otherwise return */ +	if (r == -ENOSYS) +		r = 0; +	else +		return r; + +	/* Reset handler not implemented, use the default method */ +	need_full_reset = +		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); +	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); +  	/* -	 * ASIC reset has to be done on all HGMI hive nodes ASAP +	 * ASIC reset has to be done on all XGMI hive nodes ASAP  	 * to allow proper links negotiation in FW (within 1 sec)  	 */  	if (!skip_hw_reset && need_full_reset) { -		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { +		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {  			/* For XGMI run all resets in parallel to speed up the process */  			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { +				tmp_adev->gmc.xgmi.pending_reset = false;  				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))  					r = -EALREADY;  			} else @@ -4322,8 +4366,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,  		/* For XGMI wait for all resets to complete before proceed */  		if (!r) { -			list_for_each_entry(tmp_adev, device_list_handle, -					    gmc.xgmi.head) { +			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {  				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {  					flush_work(&tmp_adev->xgmi_reset_work);  					r = tmp_adev->asic_reset_res; @@ -4335,22 +4378,22 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,  	}  	if (!r && amdgpu_ras_intr_triggered()) { -		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { -			if (tmp_adev->mmhub.funcs && -			    tmp_adev->mmhub.funcs->reset_ras_error_count) -				tmp_adev->mmhub.funcs->reset_ras_error_count(tmp_adev); +		list_for_each_entry(tmp_adev, device_list_handle, reset_list) { +			if (tmp_adev->mmhub.ras_funcs && +			    tmp_adev->mmhub.ras_funcs->reset_ras_error_count) +				tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev);  		}  		amdgpu_ras_intr_cleared();  	} -	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { +	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {  		if (need_full_reset) {  			/* post card */ -			if (amdgpu_device_asic_init(tmp_adev)) +			r = amdgpu_device_asic_init(tmp_adev); +			if (r) {  				dev_warn(tmp_adev->dev, "asic atom init failed!"); - -			if (!r) { +			} else {  				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");  				r = amdgpu_device_ip_resume_phase1(tmp_adev);  				if (r) @@ -4383,6 +4426,10 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,  				 */  				amdgpu_register_gpu_instance(tmp_adev); +				if (!reset_context->hive && +				    tmp_adev->gmc.xgmi.num_physical_nodes > 1) +					amdgpu_xgmi_add_device(tmp_adev); +  				r = amdgpu_device_ip_late_init(tmp_adev);  				if (r)  					goto out; @@ -4399,7 +4446,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,  				 * bad_page_threshold value to fix this once  				 * probing driver again.  				 */ -				if (!amdgpu_ras_check_err_threshold(tmp_adev)) { +				if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {  					/* must succeed. */  					amdgpu_ras_resume(tmp_adev);  				} else { @@ -4408,8 +4455,10 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,  				}  				/* Update PSP FW topology after reset */ -				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) -					r = amdgpu_xgmi_update_topology(hive, tmp_adev); +				if (reset_context->hive && +				    tmp_adev->gmc.xgmi.num_physical_nodes > 1) +					r = amdgpu_xgmi_update_topology( +						reset_context->hive, tmp_adev);  			}  		} @@ -4433,7 +4482,10 @@ out:  	}  end: -	*need_full_reset_arg = need_full_reset; +	if (need_full_reset) +		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); +	else +		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);  	return r;  } @@ -4449,7 +4501,6 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,  		down_write(&adev->reset_sem);  	} -	atomic_inc(&adev->gpu_reset_counter);  	switch (amdgpu_asic_reset_method(adev)) {  	case AMD_RESET_METHOD_MODE1:  		adev->mp1_state = PP_MP1_STATE_SHUTDOWN; @@ -4571,6 +4622,74 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)  	return 0;  } +void amdgpu_device_recheck_guilty_jobs( +	struct amdgpu_device *adev, struct list_head *device_list_handle, +	struct amdgpu_reset_context *reset_context) +{ +	int i, r = 0; + +	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { +		struct amdgpu_ring *ring = adev->rings[i]; +		int ret = 0; +		struct drm_sched_job *s_job; + +		if (!ring || !ring->sched.thread) +			continue; + +		s_job = list_first_entry_or_null(&ring->sched.pending_list, +				struct drm_sched_job, list); +		if (s_job == NULL) +			continue; + +		/* clear job's guilty and depend the folowing step to decide the real one */ +		drm_sched_reset_karma(s_job); +		drm_sched_resubmit_jobs_ext(&ring->sched, 1); + +		ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout); +		if (ret == 0) { /* timeout */ +			DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n", +						ring->sched.name, s_job->id); + +			/* set guilty */ +			drm_sched_increase_karma(s_job); +retry: +			/* do hw reset */ +			if (amdgpu_sriov_vf(adev)) { +				amdgpu_virt_fini_data_exchange(adev); +				r = amdgpu_device_reset_sriov(adev, false); +				if (r) +					adev->asic_reset_res = r; +			} else { +				clear_bit(AMDGPU_SKIP_HW_RESET, +					  &reset_context->flags); +				r = amdgpu_do_asic_reset(device_list_handle, +							 reset_context); +				if (r && r == -EAGAIN) +					goto retry; +			} + +			/* +			 * add reset counter so that the following +			 * resubmitted job could flush vmid +			 */ +			atomic_inc(&adev->gpu_reset_counter); +			continue; +		} + +		/* got the hw fence, signal finished fence */ +		atomic_dec(ring->sched.score); +		dma_fence_get(&s_job->s_fence->finished); +		dma_fence_signal(&s_job->s_fence->finished); +		dma_fence_put(&s_job->s_fence->finished); + +		/* remove node from list and free the job */ +		spin_lock(&ring->sched.job_list_lock); +		list_del_init(&s_job->list); +		spin_unlock(&ring->sched.job_list_lock); +		ring->sched.ops->free_job(s_job); +	} +} +  /**   * amdgpu_device_gpu_recover - reset the asic and recover scheduler   * @@ -4586,13 +4705,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  			      struct amdgpu_job *job)  {  	struct list_head device_list, *device_list_handle =  NULL; -	bool need_full_reset = false;  	bool job_signaled = false;  	struct amdgpu_hive_info *hive = NULL;  	struct amdgpu_device *tmp_adev = NULL;  	int i, r = 0;  	bool need_emergency_restart = false;  	bool audio_suspended = false; +	int tmp_vram_lost_counter; +	struct amdgpu_reset_context reset_context; + +	memset(&reset_context, 0, sizeof(reset_context));  	/*  	 * Special case: RAS triggered and full reset isn't supported @@ -4633,6 +4755,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  		mutex_lock(&hive->hive_lock);  	} +	reset_context.method = AMD_RESET_METHOD_NONE; +	reset_context.reset_req_dev = adev; +	reset_context.job = job; +	reset_context.hive = hive; +	clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); +  	/*  	 * lock the device before we try to operate the linked list  	 * if didn't get the device lock, don't touch the linked list since @@ -4656,16 +4784,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  	 */  	INIT_LIST_HEAD(&device_list);  	if (adev->gmc.xgmi.num_physical_nodes > 1) { -		if (!list_is_first(&adev->gmc.xgmi.head, &hive->device_list)) -			list_rotate_to_front(&adev->gmc.xgmi.head, &hive->device_list); -		device_list_handle = &hive->device_list; +		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) +			list_add_tail(&tmp_adev->reset_list, &device_list); +		if (!list_is_first(&adev->reset_list, &device_list)) +			list_rotate_to_front(&adev->reset_list, &device_list); +		device_list_handle = &device_list;  	} else { -		list_add_tail(&adev->gmc.xgmi.head, &device_list); +		list_add_tail(&adev->reset_list, &device_list);  		device_list_handle = &device_list;  	}  	/* block all schedulers and reset given job's ring */ -	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { +	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {  		/*  		 * Try to put the audio codec into suspend state  		 * before gpu reset started. @@ -4710,6 +4840,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  			if (need_emergency_restart)  				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);  		} +		atomic_inc(&tmp_adev->gpu_reset_counter);  	}  	if (need_emergency_restart) @@ -4729,10 +4860,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  	}  retry:	/* Rest of adevs pre asic reset from XGMI hive. */ -	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { -		r = amdgpu_device_pre_asic_reset(tmp_adev, -						 (tmp_adev == adev) ? job : NULL, -						 &need_full_reset); +	list_for_each_entry(tmp_adev, device_list_handle, reset_list) { +		r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context);  		/*TODO Should we stop ?*/  		if (r) {  			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", @@ -4741,6 +4870,7 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */  		}  	} +	tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));  	/* Actual ASIC resets if needed.*/  	/* TODO Implement XGMI hive reset logic for SRIOV */  	if (amdgpu_sriov_vf(adev)) { @@ -4748,7 +4878,7 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */  		if (r)  			adev->asic_reset_res = r;  	} else { -		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset, false); +		r = amdgpu_do_asic_reset(device_list_handle, &reset_context);  		if (r && r == -EAGAIN)  			goto retry;  	} @@ -4756,7 +4886,19 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */  skip_hw_reset:  	/* Post ASIC reset for all devs .*/ -	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { +	list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + +		/* +		 * Sometimes a later bad compute job can block a good gfx job as gfx +		 * and compute ring share internal GC HW mutually. We add an additional +		 * guilty jobs recheck step to find the real guilty job, it synchronously +		 * submits and pends for the first job being signaled. If it gets timeout, +		 * we identify it as a real guilty job. +		 */ +		if (amdgpu_gpu_recovery == 2 && +			!(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter))) +			amdgpu_device_recheck_guilty_jobs( +				tmp_adev, device_list_handle, &reset_context);  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {  			struct amdgpu_ring *ring = tmp_adev->rings[i]; @@ -4787,10 +4929,17 @@ skip_hw_reset:  	}  skip_sched_resume: -	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { -		/*unlock kfd: SRIOV would do it separately */ +	list_for_each_entry(tmp_adev, device_list_handle, reset_list) { +		/* unlock kfd: SRIOV would do it separately */  		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))  	                amdgpu_amdkfd_post_reset(tmp_adev); + +		/* kfd_post_reset will do nothing if kfd device is not initialized, +		 * need to bring up kfd here if it's not be initialized before +		 */ +		if (!adev->kfd.init_complete) +			amdgpu_amdkfd_device_init(adev); +  		if (audio_suspended)  			amdgpu_device_resume_display_audio(tmp_adev);  		amdgpu_device_unlock_adev(tmp_adev); @@ -5052,6 +5201,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta  			drm_sched_stop(&ring->sched, NULL);  		} +		atomic_inc(&adev->gpu_reset_counter);  		return PCI_ERS_RESULT_NEED_RESET;  	case pci_channel_io_perm_failure:  		/* Permanent error, prepare for device removal */ @@ -5093,14 +5243,16 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)  	struct drm_device *dev = pci_get_drvdata(pdev);  	struct amdgpu_device *adev = drm_to_adev(dev);  	int r, i; -	bool need_full_reset = true; +	struct amdgpu_reset_context reset_context;  	u32 memsize;  	struct list_head device_list;  	DRM_INFO("PCI error: slot reset callback!!\n"); +	memset(&reset_context, 0, sizeof(reset_context)); +  	INIT_LIST_HEAD(&device_list); -	list_add_tail(&adev->gmc.xgmi.head, &device_list); +	list_add_tail(&adev->reset_list, &device_list);  	/* wait for asic to come out of reset */  	msleep(500); @@ -5121,13 +5273,18 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)  		goto out;  	} +	reset_context.method = AMD_RESET_METHOD_NONE; +	reset_context.reset_req_dev = adev; +	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); +	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); +  	adev->in_pci_err_recovery = true; -	r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset); +	r = amdgpu_device_pre_asic_reset(adev, &reset_context);  	adev->in_pci_err_recovery = false;  	if (r)  		goto out; -	r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true); +	r = amdgpu_do_asic_reset(&device_list, &reset_context);  out:  	if (!r) { | 
