diff options
author | YiPeng Chai <YiPeng.Chai@amd.com> | 2024-03-18 06:48:07 +0300 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2024-04-27 00:22:41 +0300 |
commit | f27defca68824e8e97218b8816249f258d3d5d32 (patch) | |
tree | f3f9fcea0a4d86208aba92baa75ba7b3cf64478c /drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | |
parent | b2aa6b108dd3bf081f0848f07ba74ad73ec635be (diff) | |
download | linux-f27defca68824e8e97218b8816249f258d3d5d32.tar.xz |
drm/amdgpu: umc v12_0 logs ecc errors
1. umc v12_0 logs ecc errors.
2. Reserve newly detected ecc error pages.
3. Add tag for bad pages, so that they can
be retired later.
Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v12_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 41 |
1 files changed, 40 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 085dcfe16b5e..6c2b61ef5b57 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -546,8 +546,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, uint16_t hwid, mcatype; struct ta_ras_query_address_input addr_in; uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; - uint64_t err_addr; + uint64_t err_addr, hash_val = 0; + struct ras_ecc_err *ecc_err; int count; + int ret; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); @@ -589,6 +591,43 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, return 0; } + ret = amdgpu_umc_build_pages_hash(adev, + page_pfn, count, &hash_val); + if (ret) { + dev_err(adev->dev, "Fail to build error pages hash\n"); + return ret; + } + + ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL); + if (!ecc_err) + return -ENOMEM; + + ecc_err->err_pages.pfn = kcalloc(count, sizeof(*ecc_err->err_pages.pfn), GFP_KERNEL); + if (!ecc_err->err_pages.pfn) { + kfree(ecc_err); + return -ENOMEM; + } + + memcpy(ecc_err->err_pages.pfn, page_pfn, count * sizeof(*ecc_err->err_pages.pfn)); + ecc_err->err_pages.count = count; + + ecc_err->hash_index = hash_val; + ecc_err->status = status; + ecc_err->ipid = ipid; + ecc_err->addr = addr; + + ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); + if (ret) { + if (ret == -EEXIST) + con->umc_ecc_log.de_updated = true; + else + dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret); + + kfree(ecc_err->err_pages.pfn); + kfree(ecc_err); + return ret; + } + con->umc_ecc_log.de_updated = true; return 0; |