summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
diff options
context:
space:
mode:
authorYiPeng Chai <YiPeng.Chai@amd.com>2024-03-18 06:48:07 +0300
committerAlex Deucher <alexander.deucher@amd.com>2024-04-27 00:22:41 +0300
commitf27defca68824e8e97218b8816249f258d3d5d32 (patch)
treef3f9fcea0a4d86208aba92baa75ba7b3cf64478c /drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
parentb2aa6b108dd3bf081f0848f07ba74ad73ec635be (diff)
downloadlinux-f27defca68824e8e97218b8816249f258d3d5d32.tar.xz
drm/amdgpu: umc v12_0 logs ecc errors
1. umc v12_0 logs ecc errors. 2. Reserve newly detected ecc error pages. 3. Add tag for bad pages, so that they can be retired later. Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/umc_v12_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.c41
1 files changed, 40 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 085dcfe16b5e..6c2b61ef5b57 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -546,8 +546,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
uint16_t hwid, mcatype;
struct ta_ras_query_address_input addr_in;
uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
- uint64_t err_addr;
+ uint64_t err_addr, hash_val = 0;
+ struct ras_ecc_err *ecc_err;
int count;
+ int ret;
hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
@@ -589,6 +591,43 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
return 0;
}
+ ret = amdgpu_umc_build_pages_hash(adev,
+ page_pfn, count, &hash_val);
+ if (ret) {
+ dev_err(adev->dev, "Fail to build error pages hash\n");
+ return ret;
+ }
+
+ ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL);
+ if (!ecc_err)
+ return -ENOMEM;
+
+ ecc_err->err_pages.pfn = kcalloc(count, sizeof(*ecc_err->err_pages.pfn), GFP_KERNEL);
+ if (!ecc_err->err_pages.pfn) {
+ kfree(ecc_err);
+ return -ENOMEM;
+ }
+
+ memcpy(ecc_err->err_pages.pfn, page_pfn, count * sizeof(*ecc_err->err_pages.pfn));
+ ecc_err->err_pages.count = count;
+
+ ecc_err->hash_index = hash_val;
+ ecc_err->status = status;
+ ecc_err->ipid = ipid;
+ ecc_err->addr = addr;
+
+ ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err);
+ if (ret) {
+ if (ret == -EEXIST)
+ con->umc_ecc_log.de_updated = true;
+ else
+ dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret);
+
+ kfree(ecc_err->err_pages.pfn);
+ kfree(ecc_err);
+ return ret;
+ }
+
con->umc_ecc_log.de_updated = true;
return 0;