diff options
author | Tao Zhou <tao.zhou1@amd.com> | 2021-09-23 09:11:22 +0300 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2021-10-04 22:22:57 +0300 |
commit | c7490949239646c61db869014fcc74ed2cb91d53 (patch) | |
tree | c6a82a5cfd48c00b509af6f3fad17f25594aa9c6 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | |
parent | e5d59cfa330523e47cba62a496864acc3948fc27 (diff) | |
download | linux-c7490949239646c61db869014fcc74ed2cb91d53.tar.xz |
amd/amdkfd: add ras page retirement handling for sq/sdma (v3)
In ras poison mode, page retirement will be handled by the irq handler of the
module which consumes corrupted data.
v2: rename ras_process_cb to ras_poison_consumption_handler.
move the handler's implementation from ASIC specific file to common
file.
v3: call gpu reset for xGMI connected mode.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 1d41c2c00623..7505f1b9d3f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -31,6 +31,8 @@ #include <linux/dma-buf.h> #include "amdgpu_xgmi.h" #include <uapi/linux/kfd_ioctl.h> +#include "amdgpu_ras.h" +#include "amdgpu_umc.h" /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -780,3 +782,15 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) return adev->have_atomics_support; } + +void amdgpu_amdkfd_ras_poison_consumption_handler(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct ras_err_data err_data = {0, 0, 0, NULL}; + + /* CPU MCA will handle page retirement if connected_to_cpu is 1 */ + if (!adev->gmc.xgmi.connected_to_cpu) + amdgpu_umc_process_ras_data_cb(adev, &err_data, NULL); + else + amdgpu_amdkfd_gpu_reset(kgd); +} |