diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 224 | 
1 files changed, 191 insertions, 33 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b26e2fd1c538..922938931e1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -36,6 +36,7 @@  #include <linux/vga_switcheroo.h>  #include <drm/drm_probe_helper.h>  #include <linux/mmu_notifier.h> +#include <linux/suspend.h>  #include "amdgpu.h"  #include "amdgpu_irq.h" @@ -45,6 +46,8 @@  #include "amdgpu_amdkfd.h"  #include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" +#include "amdgpu_reset.h"  /*   * KMS wrapper. @@ -90,9 +93,10 @@   * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC   * - 3.39.0 - DMABUF implicit sync does a full pipeline sync   * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ + * - 3.41.0 - Add video codec query   */  #define KMS_DRIVER_MAJOR	3 -#define KMS_DRIVER_MINOR	40 +#define KMS_DRIVER_MINOR	41  #define KMS_DRIVER_PATCHLEVEL	0  int amdgpu_vram_limit; @@ -145,6 +149,7 @@ int amdgpu_compute_multipipe = -1;  int amdgpu_gpu_recovery = -1; /* auto */  int amdgpu_emu_mode;  uint amdgpu_smu_memory_pool_size; +int amdgpu_smu_pptable_id = -1;  /*   * FBC (bit 0) disabled by default   * MULTI_MON_PP_MCLK_SWITCH (bit 1) enabled by default @@ -162,16 +167,26 @@ int amdgpu_discovery = -1;  int amdgpu_mes;  int amdgpu_noretry = -1;  int amdgpu_force_asic_type = -1; -int amdgpu_tmz; +int amdgpu_tmz = -1; /* auto */ +uint amdgpu_freesync_vid_mode;  int amdgpu_reset_method = -1; /* auto */  int amdgpu_num_kcq = -1; +static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); +  struct amdgpu_mgpu_info mgpu_info = {  	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), +	.delayed_reset_work = __DELAYED_WORK_INITIALIZER( +			mgpu_info.delayed_reset_work, +			amdgpu_drv_delayed_reset_work_handler, 0),  };  int amdgpu_ras_enable = -1;  uint amdgpu_ras_mask = 0xffffffff;  int amdgpu_bad_page_threshold = -1; +struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { +	.timeout_fatal_disable = false, +	.period = 0x0, /* default to 0x0 (timeout disable) */ +};  /**   * DOC: vramlimit (int) @@ -502,7 +517,7 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);   * DOC: gpu_recovery (int)   * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).   */ -MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)"); +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (2 = advanced tdr mode, 1 = enable, 0 = disable, -1 = auto)");  module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);  /** @@ -528,6 +543,20 @@ MODULE_PARM_DESC(ras_mask, "Mask of RAS features to enable (default 0xffffffff),  module_param_named(ras_mask, amdgpu_ras_mask, uint, 0444);  /** + * DOC: timeout_fatal_disable (bool) + * Disable Watchdog timeout fatal error event + */ +MODULE_PARM_DESC(timeout_fatal_disable, "disable watchdog timeout fatal error (false = default)"); +module_param_named(timeout_fatal_disable, amdgpu_watchdog_timer.timeout_fatal_disable, bool, 0644); + +/** + * DOC: timeout_period (uint) + * Modify the watchdog timeout max_cycles as (1 << period) + */ +MODULE_PARM_DESC(timeout_period, "watchdog timeout period (0 = timeout disabled, 1 ~ 0x23 = timeout maxcycles = (1 << period)"); +module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); + +/**   * DOC: si_support (int)   * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled,   * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available, @@ -748,6 +777,13 @@ bool no_system_mem_limit;  module_param(no_system_mem_limit, bool, 0644);  MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)"); +/** + * DOC: no_queue_eviction_on_vm_fault (int) + * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction). + */ +int amdgpu_no_queue_eviction_on_vm_fault = 0; +MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)"); +module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);  #endif  /** @@ -792,10 +828,21 @@ module_param_named(backlight, amdgpu_backlight, bint, 0444);   *   * The default value: 0 (off).  TODO: change to auto till it is completed.   */ -MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)"); +MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on)");  module_param_named(tmz, amdgpu_tmz, int, 0444);  /** + * DOC: freesync_video (uint) + * Enabled the optimization to adjust front porch timing to achieve seamless mode change experience + * when setting a freesync supported mode for which full modeset is not needed. + * The default value: 0 (off). + */ +MODULE_PARM_DESC( +	freesync_video, +	"Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); +module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); + +/**   * DOC: reset_method (int)   * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco, 5 = pci)   */ @@ -815,6 +862,15 @@ module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);  MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");  module_param_named(num_kcq, amdgpu_num_kcq, int, 0444); +/** + * DOC: smu_pptable_id (int) + * Used to override pptable id. id = 0 use VBIOS pptable. + * id > 0 use the soft pptable with specicfied id. + */ +MODULE_PARM_DESC(smu_pptable_id, +	"specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)"); +module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444); +  static const struct pci_device_id pciidlist[] = {  #ifdef  CONFIG_DRM_AMDGPU_SI  	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -1107,6 +1163,7 @@ static const struct pci_device_id pciidlist[] = {  	{0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},  	{0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},  	{0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, +	{0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},  	{0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},  	/* Van Gogh */ @@ -1124,6 +1181,11 @@ static const struct pci_device_id pciidlist[] = {  	{0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},  	{0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, +	/* Aldebaran */ +	{0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, +	{0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, +	{0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, +  	{0, 0, 0}  }; @@ -1274,24 +1336,127 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)  	 */  	if (!amdgpu_passthrough(adev))  		adev->mp1_state = PP_MP1_STATE_UNLOAD; -	adev->in_poweroff_reboot_com = true;  	amdgpu_device_ip_suspend(adev); -	adev->in_poweroff_reboot_com = false;  	adev->mp1_state = PP_MP1_STATE_NONE;  } +/** + * amdgpu_drv_delayed_reset_work_handler - work handler for reset + * + * @work: work_struct. + */ +static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) +{ +	struct list_head device_list; +	struct amdgpu_device *adev; +	int i, r; +	struct amdgpu_reset_context reset_context; + +	memset(&reset_context, 0, sizeof(reset_context)); + +	mutex_lock(&mgpu_info.mutex); +	if (mgpu_info.pending_reset == true) { +		mutex_unlock(&mgpu_info.mutex); +		return; +	} +	mgpu_info.pending_reset = true; +	mutex_unlock(&mgpu_info.mutex); + +	/* Use a common context, just need to make sure full reset is done */ +	reset_context.method = AMD_RESET_METHOD_NONE; +	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + +	for (i = 0; i < mgpu_info.num_dgpu; i++) { +		adev = mgpu_info.gpu_ins[i].adev; +		reset_context.reset_req_dev = adev; +		r = amdgpu_device_pre_asic_reset(adev, &reset_context); +		if (r) { +			dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", +				r, adev_to_drm(adev)->unique); +		} +		if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work)) +			r = -EALREADY; +	} +	for (i = 0; i < mgpu_info.num_dgpu; i++) { +		adev = mgpu_info.gpu_ins[i].adev; +		flush_work(&adev->xgmi_reset_work); +		adev->gmc.xgmi.pending_reset = false; +	} + +	/* reset function will rebuild the xgmi hive info , clear it now */ +	for (i = 0; i < mgpu_info.num_dgpu; i++) +		amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev); + +	INIT_LIST_HEAD(&device_list); + +	for (i = 0; i < mgpu_info.num_dgpu; i++) +		list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, &device_list); + +	/* unregister the GPU first, reset function will add them back */ +	list_for_each_entry(adev, &device_list, reset_list) +		amdgpu_unregister_gpu_instance(adev); + +	/* Use a common context, just need to make sure full reset is done */ +	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); +	r = amdgpu_do_asic_reset(&device_list, &reset_context); + +	if (r) { +		DRM_ERROR("reinit gpus failure"); +		return; +	} +	for (i = 0; i < mgpu_info.num_dgpu; i++) { +		adev = mgpu_info.gpu_ins[i].adev; +		if (!adev->kfd.init_complete) +			amdgpu_amdkfd_device_init(adev); +		amdgpu_ttm_set_buffer_funcs_status(adev, true); +	} +	return; +} + +static int amdgpu_pmops_prepare(struct device *dev) +{ +	struct drm_device *drm_dev = dev_get_drvdata(dev); + +	/* Return a positive number here so +	 * DPM_FLAG_SMART_SUSPEND works properly +	 */ +	if (amdgpu_device_supports_boco(drm_dev)) +		return pm_runtime_suspended(dev) && +			pm_suspend_via_firmware(); + +	return 0; +} + +static void amdgpu_pmops_complete(struct device *dev) +{ +	/* nothing to do */ +} +  static int amdgpu_pmops_suspend(struct device *dev)  {  	struct drm_device *drm_dev = dev_get_drvdata(dev); +	struct amdgpu_device *adev = drm_to_adev(drm_dev); +	int r; -	return amdgpu_device_suspend(drm_dev, true); +	if (amdgpu_acpi_is_s0ix_supported(adev)) +		adev->in_s0ix = true; +	adev->in_s3 = true; +	r = amdgpu_device_suspend(drm_dev, true); +	adev->in_s3 = false; + +	return r;  }  static int amdgpu_pmops_resume(struct device *dev)  {  	struct drm_device *drm_dev = dev_get_drvdata(dev); +	struct amdgpu_device *adev = drm_to_adev(drm_dev); +	int r; -	return amdgpu_device_resume(drm_dev, true); +	r = amdgpu_device_resume(drm_dev, true); +	if (amdgpu_acpi_is_s0ix_supported(adev)) +		adev->in_s0ix = false; +	return r;  }  static int amdgpu_pmops_freeze(struct device *dev) @@ -1300,9 +1465,9 @@ static int amdgpu_pmops_freeze(struct device *dev)  	struct amdgpu_device *adev = drm_to_adev(drm_dev);  	int r; -	adev->in_hibernate = true; +	adev->in_s4 = true;  	r = amdgpu_device_suspend(drm_dev, true); -	adev->in_hibernate = false; +	adev->in_s4 = false;  	if (r)  		return r;  	return amdgpu_asic_reset(adev); @@ -1318,13 +1483,8 @@ static int amdgpu_pmops_thaw(struct device *dev)  static int amdgpu_pmops_poweroff(struct device *dev)  {  	struct drm_device *drm_dev = dev_get_drvdata(dev); -	struct amdgpu_device *adev = drm_to_adev(drm_dev); -	int r; -	adev->in_poweroff_reboot_com = true; -	r =  amdgpu_device_suspend(drm_dev, true); -	adev->in_poweroff_reboot_com = false; -	return r; +	return amdgpu_device_suspend(drm_dev, true);  }  static int amdgpu_pmops_restore(struct device *dev) @@ -1357,7 +1517,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)  	}  	adev->in_runpm = true; -	if (amdgpu_device_supports_atpx(drm_dev)) +	if (amdgpu_device_supports_px(drm_dev))  		drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;  	ret = amdgpu_device_suspend(drm_dev, false); @@ -1366,16 +1526,14 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)  		return ret;  	} -	if (amdgpu_device_supports_atpx(drm_dev)) { +	if (amdgpu_device_supports_px(drm_dev)) {  		/* Only need to handle PCI state in the driver for ATPX  		 * PCI core handles it for _PR3.  		 */ -		if (!amdgpu_is_atpx_hybrid()) { -			amdgpu_device_cache_pci_state(pdev); -			pci_disable_device(pdev); -			pci_ignore_hotplug(pdev); -			pci_set_power_state(pdev, PCI_D3cold); -		} +		amdgpu_device_cache_pci_state(pdev); +		pci_disable_device(pdev); +		pci_ignore_hotplug(pdev); +		pci_set_power_state(pdev, PCI_D3cold);  		drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;  	} else if (amdgpu_device_supports_baco(drm_dev)) {  		amdgpu_device_baco_enter(drm_dev); @@ -1394,19 +1552,17 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)  	if (!adev->runpm)  		return -EINVAL; -	if (amdgpu_device_supports_atpx(drm_dev)) { +	if (amdgpu_device_supports_px(drm_dev)) {  		drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;  		/* Only need to handle PCI state in the driver for ATPX  		 * PCI core handles it for _PR3.  		 */ -		if (!amdgpu_is_atpx_hybrid()) { -			pci_set_power_state(pdev, PCI_D0); -			amdgpu_device_load_pci_state(pdev); -			ret = pci_enable_device(pdev); -			if (ret) -				return ret; -		} +		pci_set_power_state(pdev, PCI_D0); +		amdgpu_device_load_pci_state(pdev); +		ret = pci_enable_device(pdev); +		if (ret) +			return ret;  		pci_set_master(pdev);  	} else if (amdgpu_device_supports_boco(drm_dev)) {  		/* Only need to handle PCI state in the driver for ATPX @@ -1417,7 +1573,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)  		amdgpu_device_baco_exit(drm_dev);  	}  	ret = amdgpu_device_resume(drm_dev, false); -	if (amdgpu_device_supports_atpx(drm_dev)) +	if (amdgpu_device_supports_px(drm_dev))  		drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;  	adev->in_runpm = false;  	return 0; @@ -1498,6 +1654,8 @@ out:  }  static const struct dev_pm_ops amdgpu_pm_ops = { +	.prepare = amdgpu_pmops_prepare, +	.complete = amdgpu_pmops_complete,  	.suspend = amdgpu_pmops_suspend,  	.resume = amdgpu_pmops_resume,  	.freeze = amdgpu_pmops_freeze, | 
