diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b24f78b74b41..668c20902358 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -45,6 +45,7 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" /* * KMS wrapper. @@ -168,8 +169,13 @@ int amdgpu_tmz = -1; /* auto */ int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; +static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); + struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), + .delayed_reset_work = __DELAYED_WORK_INITIALIZER( + mgpu_info.delayed_reset_work, + amdgpu_drv_delayed_reset_work_handler, 0), }; int amdgpu_ras_enable = -1; uint amdgpu_ras_mask = 0xffffffff; @@ -1321,6 +1327,69 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) adev->mp1_state = PP_MP1_STATE_NONE; } +/** + * amdgpu_drv_delayed_reset_work_handler - work handler for reset + * + * @work: work_struct. + */ +static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) +{ + struct list_head device_list; + struct amdgpu_device *adev; + int i, r; + bool need_full_reset = true; + + mutex_lock(&mgpu_info.mutex); + if (mgpu_info.pending_reset == true) { + mutex_unlock(&mgpu_info.mutex); + return; + } + mgpu_info.pending_reset = true; + mutex_unlock(&mgpu_info.mutex); + + for (i = 0; i < mgpu_info.num_dgpu; i++) { + adev = mgpu_info.gpu_ins[i].adev; + r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset); + if (r) { + dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", + r, adev_to_drm(adev)->unique); + } + if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work)) + r = -EALREADY; + } + for (i = 0; i < mgpu_info.num_dgpu; i++) { + adev = mgpu_info.gpu_ins[i].adev; + adev->gmc.xgmi.pending_reset = false; + flush_work(&adev->xgmi_reset_work); + } + + /* reset function will rebuild the xgmi hive info , clear it now */ + for (i = 0; i < mgpu_info.num_dgpu; i++) + amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev); + + INIT_LIST_HEAD(&device_list); + + for (i = 0; i < mgpu_info.num_dgpu; i++) + list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, &device_list); + + /* unregister the GPU first, reset function will add them back */ + list_for_each_entry(adev, &device_list, reset_list) + amdgpu_unregister_gpu_instance(adev); + + r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true); + if (r) { + DRM_ERROR("reinit gpus failure"); + return; + } + for (i = 0; i < mgpu_info.num_dgpu; i++) { + adev = mgpu_info.gpu_ins[i].adev; + if (!adev->kfd.init_complete) + amdgpu_amdkfd_device_init(adev); + amdgpu_ttm_set_buffer_funcs_status(adev, true); + } + return; +} + static int amdgpu_pmops_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); |