summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGangliang Xie <ganglxie@amd.com>2025-12-15 09:19:34 +0300
committerAlex Deucher <alexander.deucher@amd.com>2026-03-04 19:42:04 +0300
commit42c46be2ec30df732cea4d4682e8f70795f21cee (patch)
treeb105fe990bd868a193f5ff8b5ab9310f89286582
parent3972f41bc107cc10fa82c54ec420b7a947ba7912 (diff)
downloadlinux-42c46be2ec30df732cea4d4682e8f70795f21cee.tar.xz
drm/amd/ras: add read func for pmfw eeprom
add read func for pmfw eeprom, and adapt address converting for bad pages loaded from pmfw eeprom v2: change label 'Out' to 'out' Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Gangliang Xie <ganglxie@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras.h1
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_core.c5
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c70
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h5
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_umc.c27
-rw-r--r--drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c2
6 files changed, 101 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras.h b/drivers/gpu/drm/amd/ras/rascore/ras.h
index 4ceb72d24e35..04f9e09884aa 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras.h
+++ b/drivers/gpu/drm/amd/ras/rascore/ras.h
@@ -241,6 +241,7 @@ struct ras_bank_ecc {
uint64_t status;
uint64_t ipid;
uint64_t addr;
+ uint64_t ts;
};
struct ras_bank_ecc_node {
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_core.c b/drivers/gpu/drm/amd/ras/rascore/ras_core.c
index 01ad6ebab902..572872ef367b 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_core.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_core.c
@@ -241,7 +241,10 @@ static int ras_core_eeprom_recovery(struct ras_core_context *ras_core)
int count;
int ret;
- count = ras_eeprom_get_record_count(ras_core);
+ if (ras_fw_eeprom_supported(ras_core))
+ count = ras_fw_eeprom_get_record_count(ras_core);
+ else
+ count = ras_eeprom_get_record_count(ras_core);
if (!count)
return 0;
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
index 580dd7b09d00..79494ad16ee5 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.c
@@ -259,3 +259,73 @@ int ras_fw_eeprom_append(struct ras_core_context *ras_core,
mutex_unlock(&control->ras_tbl_mutex);
return 0;
}
+
+int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core,
+ struct eeprom_umc_record *record_umc,
+ struct ras_bank_ecc *ras_ecc,
+ u32 rec_idx, const u32 num)
+{
+ struct ras_fw_eeprom_control *control = &ras_core->ras_fw_eeprom;
+ int i, ret, end_idx;
+ u64 mca, ipid, ts;
+
+ if (!ras_core->ras_umc.ip_func ||
+ !ras_core->ras_umc.ip_func->mca_ipid_parse)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&control->ras_tbl_mutex);
+
+ end_idx = rec_idx + num;
+ for (i = rec_idx; i < end_idx; i++) {
+ ret = ras_fw_get_badpage_mca_addr(ras_core, i, &mca);
+ if (ret)
+ goto out;
+
+ ret = ras_fw_get_badpage_ipid(ras_core, i, &ipid);
+ if (ret)
+ goto out;
+
+ ret = ras_fw_get_timestamp(ras_core, i, &ts);
+ if (ret)
+ goto out;
+
+ if (record_umc) {
+ record_umc[i - rec_idx].address = mca;
+ /* retired_page (pa) is unused now */
+ record_umc[i - rec_idx].retired_row_pfn = 0x1ULL;
+ record_umc[i - rec_idx].ts = ts;
+ record_umc[i - rec_idx].err_type = RAS_EEPROM_ERR_NON_RECOVERABLE;
+
+ ras_core->ras_umc.ip_func->mca_ipid_parse(ras_core, ipid,
+ (uint32_t *)&(record_umc[i - rec_idx].cu),
+ (uint32_t *)&(record_umc[i - rec_idx].mem_channel),
+ (uint32_t *)&(record_umc[i - rec_idx].mcumc_id), NULL);
+
+ /* update bad channel bitmap */
+ if ((record_umc[i - rec_idx].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) &&
+ !(control->bad_channel_bitmap & (1 << record_umc[i - rec_idx].mem_channel))) {
+ control->bad_channel_bitmap |= 1 << record_umc[i - rec_idx].mem_channel;
+ control->update_channel_flag = true;
+ }
+ }
+
+ if (ras_ecc) {
+ ras_ecc[i - rec_idx].addr = mca;
+ ras_ecc[i - rec_idx].ipid = ipid;
+ ras_ecc[i - rec_idx].ts = ts;
+ }
+
+ }
+
+out:
+ mutex_unlock(&control->ras_tbl_mutex);
+ return ret;
+}
+
+uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core)
+{
+ if (!ras_core)
+ return 0;
+
+ return ras_core->ras_fw_eeprom.ras_num_recs;
+}
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
index b94d3c9703e3..353977a2371e 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_eeprom_fw.h
@@ -70,5 +70,10 @@ int ras_fw_eeprom_reset_table(struct ras_core_context *ras_core);
bool ras_fw_eeprom_check_safety_watermark(struct ras_core_context *ras_core);
int ras_fw_eeprom_append(struct ras_core_context *ras_core,
struct eeprom_umc_record *record, const u32 num);
+int ras_fw_eeprom_read_idx(struct ras_core_context *ras_core,
+ struct eeprom_umc_record *record_umc,
+ struct ras_bank_ecc *ras_ecc,
+ u32 rec_idx, const u32 num);
+uint32_t ras_fw_eeprom_get_record_count(struct ras_core_context *ras_core);
#endif
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
index f7c2cb0a8a0c..23118f41eb96 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc.c
@@ -448,17 +448,27 @@ int ras_umc_load_bad_pages(struct ras_core_context *ras_core)
uint32_t ras_num_recs;
int ret;
- ras_num_recs = ras_eeprom_get_record_count(ras_core);
- /* no bad page record, skip eeprom access */
- if (!ras_num_recs ||
- ras_core->ras_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE)
- return 0;
+ if (ras_fw_eeprom_supported(ras_core)) {
+ ras_num_recs = ras_fw_eeprom_get_record_count(ras_core);
+ /* no bad page record, skip eeprom access */
+ if (!ras_num_recs ||
+ ras_core->ras_fw_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE)
+ return 0;
+ } else {
+ ras_num_recs = ras_eeprom_get_record_count(ras_core);
+ if (!ras_num_recs ||
+ ras_core->ras_eeprom.record_threshold_config == DISABLE_RETIRE_PAGE)
+ return 0;
+ }
bps = kzalloc_objs(*bps, ras_num_recs);
if (!bps)
return -ENOMEM;
- ret = ras_eeprom_read(ras_core, bps, ras_num_recs);
+ if (ras_fw_eeprom_supported(ras_core))
+ ret = ras_fw_eeprom_read_idx(ras_core, bps, 0, 0, ras_num_recs);
+ else
+ ret = ras_eeprom_read(ras_core, bps, ras_num_recs);
if (ret) {
RAS_DEV_ERR(ras_core->dev, "Failed to load EEPROM table records!");
} else {
@@ -486,7 +496,10 @@ static int ras_umc_save_bad_pages(struct ras_core_context *ras_core)
if (!data->bps)
return 0;
- eeprom_record_num = ras_eeprom_get_record_count(ras_core);
+ if (ras_fw_eeprom_supported(ras_core))
+ eeprom_record_num = ras_fw_eeprom_get_record_count(ras_core);
+ else
+ eeprom_record_num = ras_eeprom_get_record_count(ras_core);
mutex_lock(&ras_umc->umc_lock);
save_count = data->count - eeprom_record_num;
/* only new entries are saved */
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
index e2792b239bea..53dc59e4de0c 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_umc_v12_0.c
@@ -413,7 +413,7 @@ static int umc_v12_0_eeprom_record_to_nps_record(struct ras_core_context *ras_co
uint64_t pa = 0;
int ret = 0;
- if (nps == EEPROM_RECORD_UMC_NPS_MODE(record)) {
+ if (nps == EEPROM_RECORD_UMC_NPS_MODE(record) && !ras_fw_eeprom_supported(ras_core)) {
record->cur_nps_retired_row_pfn = EEPROM_RECORD_UMC_ADDR_PFN(record);
} else {
ret = convert_eeprom_record_to_nps_addr(ras_core,