diff options
author | Qiuxu Zhuo <qiuxu.zhuo@intel.com> | 2024-10-15 10:22:36 +0300 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2024-10-23 21:59:21 +0300 |
commit | a36667037a0c0e36c59407f8ae636295390239a5 (patch) | |
tree | 6bc66b566a3fd90ba9ab04db2e74c0525a5c36e8 | |
parent | 2397f795735219caa9c2fe61e7bcdd0652e670d3 (diff) | |
download | linux-a36667037a0c0e36c59407f8ae636295390239a5.tar.xz |
EDAC/{skx_common,i10nm}: Fix incorrect far-memory error source indicator
The Granite Rapids CPUs with Flat2LM memory configurations may
mistakenly report near-memory errors as far-memory errors, resulting
in the invalid decoded ADXL results:
EDAC skx: Bad imc -1
Fix this incorrect far-memory error source indicator by prefetching the
decoded far-memory controller ID, and adjust the error source indicator
to near-memory if the far-memory controller ID is invalid.
Fixes: ba987eaaabf9 ("EDAC/i10nm: Add Intel Granite Rapids server support")
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Tested-by: Diego Garcia Rodriguez <diego.garcia.rodriguez@intel.com>
Link: https://lore.kernel.org/r/20241015072236.24543-3-qiuxu.zhuo@intel.com
-rw-r--r-- | drivers/edac/i10nm_base.c | 1 | ||||
-rw-r--r-- | drivers/edac/skx_common.c | 23 | ||||
-rw-r--r-- | drivers/edac/skx_common.h | 1 |
3 files changed, 25 insertions, 0 deletions
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index e2a954de913b..51556c72a967 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -1036,6 +1036,7 @@ static int __init i10nm_init(void) return -ENODEV; cfg = (struct res_config *)id->driver_data; + skx_set_res_cfg(cfg); res_cfg = cfg; rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 52b462899870..6cf17af7d911 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -47,6 +47,7 @@ static skx_show_retry_log_f skx_show_retry_rd_err_log; static u64 skx_tolm, skx_tohm; static LIST_HEAD(dev_edac_list); static bool skx_mem_cfg_2lm; +static struct res_config *skx_res_cfg; int skx_adxl_get(void) { @@ -135,6 +136,22 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src) return false; } + /* + * GNR with a Flat2LM memory configuration may mistakenly classify + * a near-memory error(DDR5) as a far-memory error(CXL), resulting + * in the incorrect selection of decoded ADXL components. + * To address this, prefetch the decoded far-memory controller ID + * and adjust the error source to near-memory if the far-memory + * controller ID is invalid. + */ + if (skx_res_cfg && skx_res_cfg->type == GNR && err_src == ERR_SRC_2LM_FM) { + res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; + if (res->imc == -1) { + err_src = ERR_SRC_2LM_NM; + edac_dbg(0, "Adjust the error source to near-memory.\n"); + } + } + res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]]; if (err_src == ERR_SRC_2LM_NM) { res->imc = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ? @@ -191,6 +208,12 @@ void skx_set_mem_cfg(bool mem_cfg_2lm) } EXPORT_SYMBOL_GPL(skx_set_mem_cfg); +void skx_set_res_cfg(struct res_config *cfg) +{ + skx_res_cfg = cfg; +} +EXPORT_SYMBOL_GPL(skx_set_res_cfg); + void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) { driver_decode = decode; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index cd47f8186831..54bba8a62f72 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -241,6 +241,7 @@ int skx_adxl_get(void); void skx_adxl_put(void); void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); void skx_set_mem_cfg(bool mem_cfg_2lm); +void skx_set_res_cfg(struct res_config *cfg); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); int skx_get_node_id(struct skx_dev *d, u8 *id); |