diff options
author | Dave Airlie <airlied@redhat.com> | 2018-12-13 02:49:04 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2018-12-13 03:06:34 +0300 |
commit | e7df065a697783ecb5c6eaa5692d78dcfceb71dd (patch) | |
tree | 2e42f7ec8ac73c13ffd6575e016dad1beb4de83b /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |
parent | e69aa5f9b97f7f871643336deb281db5cb14878b (diff) | |
parent | 674e78acae0dfb4beb56132e41cbae5b60f7d662 (diff) | |
download | linux-e7df065a697783ecb5c6eaa5692d78dcfceb71dd.tar.xz |
Merge branch 'drm-next-4.21' of git://people.freedesktop.org/~agd5f/linux into drm-next
[airlied: make etnaviv build again]
amdgpu:
- DC trace support
- More DC documentation
- XGMI hive reset support
- Rework IH interaction with KFD
- Misc fixes and cleanups
- Powerplay updates for newer polaris variants
- Add cursor plane update fast path
- Enable gpu reset by default on CI parts
- Fix config with KFD/HSA not enabled
amdkfd:
- Limit vram overcommit
- dmabuf support
- Support for doorbell BOs
ttm:
- Support for simultaneous submissions to multiple engines
scheduler:
- Add helpers for hw with preemption support
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181207233119.16861-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 52 |
1 files changed, 46 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c75badfa5c4c..b60afeade50a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -515,7 +515,6 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) */ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) { - amdgpu_asic_init_doorbell_index(adev); /* No doorbell on SI hardware generation */ if (adev->asic_type < CHIP_BONAIRE) { @@ -529,6 +528,8 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) return -EINVAL; + amdgpu_asic_init_doorbell_index(adev); + /* doorbell bar mapping */ adev->doorbell.base = pci_resource_start(adev->pdev, 2); adev->doorbell.size = pci_resource_len(adev->pdev, 2); @@ -1864,6 +1865,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + if (adev->gmc.xgmi.num_physical_nodes > 1) + amdgpu_xgmi_remove_device(adev); + amdgpu_amdkfd_device_fini(adev); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); @@ -2353,6 +2357,19 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) return amdgpu_device_asic_has_dc_support(adev->asic_type); } + +static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) +{ + struct amdgpu_device *adev = + container_of(__work, struct amdgpu_device, xgmi_reset_work); + + adev->asic_reset_res = amdgpu_asic_reset(adev); + if (adev->asic_reset_res) + DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s", + adev->asic_reset_res, adev->ddev->unique); +} + + /** * amdgpu_device_init - initialize the driver * @@ -2451,6 +2468,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, amdgpu_device_delay_enable_gfx_off); + INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); + adev->gfx.gfx_off_req_count = 1; adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; @@ -3239,6 +3258,8 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) if (amdgpu_gpu_recovery == -1) { switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: case CHIP_TOPAZ: case CHIP_TONGA: case CHIP_FIJI: @@ -3328,10 +3349,31 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, */ if (need_full_reset) { list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - r = amdgpu_asic_reset(tmp_adev); - if (r) - DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s", + /* For XGMI run all resets in parallel to speed up the process */ + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) + r = -EALREADY; + } else + r = amdgpu_asic_reset(tmp_adev); + + if (r) { + DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s", r, tmp_adev->ddev->unique); + break; + } + } + + /* For XGMI wait for all PSP resets to complete before proceed */ + if (!r) { + list_for_each_entry(tmp_adev, device_list_handle, + gmc.xgmi.head) { + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + flush_work(&tmp_adev->xgmi_reset_work); + r = tmp_adev->asic_reset_res; + if (r) + break; + } + } } } @@ -3518,8 +3560,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ if (tmp_adev == adev) continue; - dev_info(tmp_adev->dev, "GPU reset begin for drm dev %s!\n", adev->ddev->unique); - amdgpu_device_lock_adev(tmp_adev); r = amdgpu_device_pre_asic_reset(tmp_adev, NULL, |