summaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig47
-rw-r--r--drivers/edac/Makefile6
-rw-r--r--drivers/edac/altera_edac.c15
-rw-r--r--drivers/edac/altera_edac.h2
-rw-r--r--drivers/edac/amd64_edac.c111
-rw-r--r--drivers/edac/bluefield_edac.c20
-rw-r--r--drivers/edac/cell_edac.c281
-rw-r--r--drivers/edac/debugfs.c5
-rwxr-xr-xdrivers/edac/ecs.c207
-rw-r--r--drivers/edac/edac_device.c185
-rw-r--r--drivers/edac/edac_mc.c2
-rw-r--r--drivers/edac/edac_mc_sysfs.c6
-rw-r--r--drivers/edac/i10nm_base.c70
-rw-r--r--drivers/edac/i5000_edac.c8
-rw-r--r--drivers/edac/i5400_edac.c3
-rw-r--r--drivers/edac/i7300_edac.c7
-rw-r--r--drivers/edac/ie31200_edac.c641
-rw-r--r--drivers/edac/igen6_edac.c105
-rw-r--r--drivers/edac/loongson_edac.c157
-rwxr-xr-xdrivers/edac/mem_repair.c360
-rw-r--r--drivers/edac/pnd2_edac.c4
-rwxr-xr-xdrivers/edac/scrub.c210
-rw-r--r--drivers/edac/skx_base.c11
-rw-r--r--drivers/edac/skx_common.c81
-rw-r--r--drivers/edac/skx_common.h25
-rw-r--r--drivers/edac/xgene_edac.c17
26 files changed, 1872 insertions, 714 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 06f7b43a6f78..19ad3c3b675d 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -75,9 +75,38 @@ config EDAC_GHES
In doubt, say 'Y'.
+config EDAC_SCRUB
+ bool "EDAC scrub feature"
+ help
+ The EDAC scrub feature is optional and is designed to control the
+ memory scrubbers in the system. The common sysfs scrub interface
+ abstracts the control of various arbitrary scrubbing functionalities
+ into a unified set of functions.
+ Say 'y/n' to enable/disable EDAC scrub feature.
+
+config EDAC_ECS
+ bool "EDAC ECS (Error Check Scrub) feature"
+ help
+ The EDAC ECS feature is optional and is designed to control on-die
+ error check scrub (e.g., DDR5 ECS) in the system. The common sysfs
+ ECS interface abstracts the control of various ECS functionalities
+ into a unified set of functions.
+ Say 'y/n' to enable/disable EDAC ECS feature.
+
+config EDAC_MEM_REPAIR
+ bool "EDAC memory repair feature"
+ help
+ The EDAC memory repair feature is optional and is designed to control
+ the memory devices with repair features, such as Post Package Repair
+ (PPR), memory sparing etc. The common sysfs memory repair interface
+ abstracts the control of various memory repair functionalities into
+ a unified set of functions.
+ Say 'y/n' to enable/disable EDAC memory repair feature.
+
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64)"
depends on AMD_NB && EDAC_DECODE_MCE
+ depends on AMD_NODE
imply AMD_ATL
help
Support for error detection and correction of DRAM ECC errors on
@@ -167,7 +196,7 @@ config EDAC_I3200
config EDAC_IE31200
tristate "Intel e312xx"
- depends on PCI && X86
+ depends on PCI && X86 && X86_MCE_INTEL
help
Support for error detection and correction on the Intel
E3-1200 based DRAM controllers.
@@ -303,14 +332,6 @@ config EDAC_PASEMI
Support for error detection and correction on PA Semi
PWRficient.
-config EDAC_CELL
- tristate "Cell Broadband Engine memory controller"
- depends on PPC_CELL_COMMON
- help
- Support for error detection and correction on the
- Cell Broadband Engine internal memory controller
- on platform without a hypervisor
-
config EDAC_CPC925
tristate "IBM CPC925 Memory Controller (PPC970FX)"
depends on PPC64
@@ -546,5 +567,13 @@ config EDAC_VERSAL
Support injecting both correctable and uncorrectable errors
for debugging purposes.
+config EDAC_LOONGSON
+ tristate "Loongson Memory Controller"
+ depends on LOONGARCH && ACPI
+ help
+ Support for error detection and correction on the Loongson
+ family memory controller. This driver reports single bit
+ errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000
+ are compatible.
endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index f9cf19d8d13d..a8f2d8f6c894 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -12,6 +12,9 @@ edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o
edac_core-y += edac_module.o edac_device_sysfs.o wq.o
edac_core-$(CONFIG_EDAC_DEBUG) += debugfs.o
+edac_core-$(CONFIG_EDAC_SCRUB) += scrub.o
+edac_core-$(CONFIG_EDAC_ECS) += ecs.o
+edac_core-$(CONFIG_EDAC_MEM_REPAIR) += mem_repair.o
ifdef CONFIG_PCI
edac_core-y += edac_pci.o edac_pci_sysfs.o
@@ -62,8 +65,6 @@ obj-$(CONFIG_EDAC_SKX) += skx_edac.o skx_edac_common.o
i10nm_edac-y := i10nm_base.o
obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o skx_edac_common.o
-obj-$(CONFIG_EDAC_CELL) += cell_edac.o
-
obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o
obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o
@@ -86,3 +87,4 @@ obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o
obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o
obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o
obj-$(CONFIG_EDAC_VERSAL) += versal_edac.o
+obj-$(CONFIG_EDAC_LOONGSON) += loongson_edac.o
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 3e971f902363..c45c92f05373 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -99,7 +99,7 @@ static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id)
if (status & priv->ecc_stat_ce_mask) {
regmap_read(drvdata->mc_vbase, priv->ecc_saddr_offset,
&err_addr);
- if (priv->ecc_uecnt_offset)
+ if (priv->ecc_cecnt_offset)
regmap_read(drvdata->mc_vbase, priv->ecc_cecnt_offset,
&err_count);
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, err_count,
@@ -1005,9 +1005,6 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
}
}
- /* Interrupt mode set to every SBERR */
- regmap_write(ecc_mgr_map, ALTR_A10_ECC_INTMODE_OFST,
- ALTR_A10_ECC_INTMODE);
/* Enable ECC */
ecc_set_bits(ecc_ctrl_en_mask, (ecc_block_base +
ALTR_A10_ECC_CTRL_OFST));
@@ -1749,9 +1746,9 @@ altr_edac_a10_device_trig(struct file *file, const char __user *user_buf,
local_irq_save(flags);
if (trig_type == ALTR_UE_TRIGGER_CHAR)
- writel(priv->ue_set_mask, set_addr);
+ writew(priv->ue_set_mask, set_addr);
else
- writel(priv->ce_set_mask, set_addr);
+ writew(priv->ce_set_mask, set_addr);
/* Ensure the interrupt test bits are set */
wmb();
@@ -1781,7 +1778,7 @@ altr_edac_a10_device_trig2(struct file *file, const char __user *user_buf,
local_irq_save(flags);
if (trig_type == ALTR_UE_TRIGGER_CHAR) {
- writel(priv->ue_set_mask, set_addr);
+ writew(priv->ue_set_mask, set_addr);
} else {
/* Setup read/write of 4 bytes */
writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST);
@@ -2127,6 +2124,10 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
return PTR_ERR(edac->ecc_mgr_map);
}
+ /* Set irq mask for DDR SBE to avoid any pending irq before registration */
+ regmap_write(edac->ecc_mgr_map, A10_SYSMGR_ECC_INTMASK_SET_OFST,
+ (A10_SYSMGR_ECC_INTMASK_SDMMCB | A10_SYSMGR_ECC_INTMASK_DDR0));
+
edac->irq_chip.name = pdev->dev.of_node->name;
edac->irq_chip.irq_mask = a10_eccmgr_irq_mask;
edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask;
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 3727e72c8c2e..7248d24c4908 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -249,6 +249,8 @@ struct altr_sdram_mc_data {
#define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94
#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98
#define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1)
+#define A10_SYSMGR_ECC_INTMASK_SDMMCB BIT(16)
+#define A10_SYSMGR_ECC_INTMASK_DDR0 BIT(17)
#define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C
#define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5d356b7c4589..9a8866db1c3a 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/ras.h>
+#include <linux/string_choices.h>
#include "amd64_edac.h"
#include <asm/amd_nb.h>
+#include <asm/amd_node.h>
static struct edac_pci_ctl_info *pci_ctl;
@@ -1170,22 +1172,21 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3));
}
- edac_dbg(1, "All DIMMs support ECC:%s\n",
- (dclr & BIT(19)) ? "yes" : "no");
+ edac_dbg(1, "All DIMMs support ECC: %s\n", str_yes_no(dclr & BIT(19)));
edac_dbg(1, " PAR/ERR parity: %s\n",
- (dclr & BIT(8)) ? "enabled" : "disabled");
+ str_enabled_disabled(dclr & BIT(8)));
if (pvt->fam == 0x10)
edac_dbg(1, " DCT 128bit mode width: %s\n",
(dclr & BIT(11)) ? "128b" : "64b");
edac_dbg(1, " x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n",
- (dclr & BIT(12)) ? "yes" : "no",
- (dclr & BIT(13)) ? "yes" : "no",
- (dclr & BIT(14)) ? "yes" : "no",
- (dclr & BIT(15)) ? "yes" : "no");
+ str_yes_no(dclr & BIT(12)),
+ str_yes_no(dclr & BIT(13)),
+ str_yes_no(dclr & BIT(14)),
+ str_yes_no(dclr & BIT(15)));
}
#define CS_EVEN_PRIMARY BIT(0)
@@ -1208,7 +1209,9 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
if (csrow_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_PRIMARY;
- /* Asymmetric dual-rank DIMM support. */
+ if (csrow_sec_enabled(2 * dimm, ctrl, pvt))
+ cs_mode |= CS_EVEN_SECONDARY;
+
if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_SECONDARY;
@@ -1229,12 +1232,13 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
return cs_mode;
}
-static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
- int csrow_nr, int dimm)
+static int calculate_cs_size(u32 mask, unsigned int cs_mode)
{
- u32 msb, weight, num_zero_bits;
- u32 addr_mask_deinterleaved;
- int size = 0;
+ int msb, weight, num_zero_bits;
+ u32 deinterleaved_mask;
+
+ if (!mask)
+ return 0;
/*
* The number of zero bits in the mask is equal to the number of bits
@@ -1247,19 +1251,30 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
* without swapping with the most significant bit. This can be handled
* by keeping the MSB where it is and ignoring the single zero bit.
*/
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
+ msb = fls(mask) - 1;
+ weight = hweight_long(mask);
num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
/* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+ deinterleaved_mask = GENMASK(msb - num_zero_bits, 1);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask);
+
+ return (deinterleaved_mask >> 2) + 1;
+}
+
+static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec,
+ unsigned int cs_mode, int csrow_nr, int dimm)
+{
+ int size;
edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+ edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask);
/* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
+ size = calculate_cs_size(addr_mask, cs_mode);
+
+ edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask_sec);
+ size += calculate_cs_size(addr_mask_sec, cs_mode);
/* Return size in MBs. */
return size >> 10;
@@ -1268,8 +1283,8 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
+ u32 addr_mask = 0, addr_mask_sec = 0;
int cs_mask_nr = csrow_nr;
- u32 addr_mask_orig;
int dimm, size = 0;
/* No Chip Selects are enabled. */
@@ -1307,13 +1322,13 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
if (!pvt->flags.zn_regs_v2)
cs_mask_nr >>= 1;
- /* Asymmetric dual-rank DIMM support. */
- if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
- addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
- else
- addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
+ if (cs_mode & (CS_EVEN_PRIMARY | CS_ODD_PRIMARY))
+ addr_mask = pvt->csels[umc].csmasks[cs_mask_nr];
+
+ if (cs_mode & (CS_EVEN_SECONDARY | CS_ODD_SECONDARY))
+ addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm);
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm);
}
static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -1352,14 +1367,14 @@ static void umc_dump_misc_regs(struct amd64_pvt *pvt)
edac_dbg(1, "UMC%d UMC cap high: 0x%x\n", i, umc->umc_cap_hi);
edac_dbg(1, "UMC%d ECC capable: %s, ChipKill ECC capable: %s\n",
- i, (umc->umc_cap_hi & BIT(30)) ? "yes" : "no",
- (umc->umc_cap_hi & BIT(31)) ? "yes" : "no");
+ i, str_yes_no(umc->umc_cap_hi & BIT(30)),
+ str_yes_no(umc->umc_cap_hi & BIT(31)));
edac_dbg(1, "UMC%d All DIMMs support ECC: %s\n",
- i, (umc->umc_cfg & BIT(12)) ? "yes" : "no");
+ i, str_yes_no(umc->umc_cfg & BIT(12)));
edac_dbg(1, "UMC%d x4 DIMMs present: %s\n",
- i, (umc->dimm_cfg & BIT(6)) ? "yes" : "no");
+ i, str_yes_no(umc->dimm_cfg & BIT(6)));
edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
- i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
+ i, str_yes_no(umc->dimm_cfg & BIT(7)));
umc_debug_display_dimm_sizes(pvt, i);
}
@@ -1370,11 +1385,11 @@ static void dct_dump_misc_regs(struct amd64_pvt *pvt)
edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap);
edac_dbg(1, " NB two channel DRAM capable: %s\n",
- (pvt->nbcap & NBCAP_DCT_DUAL) ? "yes" : "no");
+ str_yes_no(pvt->nbcap & NBCAP_DCT_DUAL));
edac_dbg(1, " ECC capable: %s, ChipKill ECC capable: %s\n",
- (pvt->nbcap & NBCAP_SECDED) ? "yes" : "no",
- (pvt->nbcap & NBCAP_CHIPKILL) ? "yes" : "no");
+ str_yes_no(pvt->nbcap & NBCAP_SECDED),
+ str_yes_no(pvt->nbcap & NBCAP_CHIPKILL));
debug_dump_dramcfg_low(pvt, pvt->dclr0, 0);
@@ -1397,7 +1412,7 @@ static void dct_dump_misc_regs(struct amd64_pvt *pvt)
if (!dct_ganging_enabled(pvt))
debug_dump_dramcfg_low(pvt, pvt->dclr1, 1);
- edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");
+ edac_dbg(1, " DramHoleValid: %s\n", str_yes_no(dhar_valid(pvt)));
amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz);
}
@@ -2026,15 +2041,15 @@ static void read_dram_ctl_register(struct amd64_pvt *pvt)
if (!dct_ganging_enabled(pvt))
edac_dbg(0, " Address range split per DCT: %s\n",
- (dct_high_range_enabled(pvt) ? "yes" : "no"));
+ str_yes_no(dct_high_range_enabled(pvt)));
edac_dbg(0, " data interleave for ECC: %s, DRAM cleared since last warm reset: %s\n",
- (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"),
- (dct_memory_cleared(pvt) ? "yes" : "no"));
+ str_enabled_disabled(dct_data_intlv_enabled(pvt)),
+ str_yes_no(dct_memory_cleared(pvt)));
edac_dbg(0, " channel interleave: %s, "
"interleave bits selector: 0x%x\n",
- (dct_interleave_enabled(pvt) ? "enabled" : "disabled"),
+ str_enabled_disabled(dct_interleave_enabled(pvt)),
dct_sel_interleave_addr(pvt));
}
@@ -3207,8 +3222,7 @@ static bool nb_mce_bank_enabled_on_node(u16 nid)
nbe = reg->l & MSR_MCGCTL_NBE;
edac_dbg(0, "core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
- cpu, reg->q,
- (nbe ? "enabled" : "disabled"));
+ cpu, reg->q, str_enabled_disabled(nbe));
if (!nbe)
goto out;
@@ -3352,12 +3366,9 @@ static bool dct_ecc_enabled(struct amd64_pvt *pvt)
edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n",
MSR_IA32_MCG_CTL, nid);
- edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled"));
+ edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, str_enabled_disabled(ecc_en));
- if (!ecc_en || !nb_mce_en)
- return false;
- else
- return true;
+ return ecc_en && nb_mce_en;
}
static bool umc_ecc_enabled(struct amd64_pvt *pvt)
@@ -3377,7 +3388,7 @@ static bool umc_ecc_enabled(struct amd64_pvt *pvt)
}
}
- edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, (ecc_en ? "enabled" : "disabled"));
+ edac_dbg(3, "Node %d: DRAM ECC %s.\n", pvt->mc_node_id, str_enabled_disabled(ecc_en));
return ecc_en;
}
@@ -3515,9 +3526,10 @@ static void gpu_get_err_info(struct mce *m, struct err_info *err)
static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1);
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1);
}
static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -3882,6 +3894,7 @@ static int per_family_init(struct amd64_pvt *pvt)
break;
case 0x70 ... 0x7f:
pvt->ctl_name = "F19h_M70h";
+ pvt->max_mcs = 4;
pvt->flags.zn_regs_v2 = 1;
break;
case 0x90 ... 0x9f:
diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c
index 4942a240c30f..ae3bb7afa103 100644
--- a/drivers/edac/bluefield_edac.c
+++ b/drivers/edac/bluefield_edac.c
@@ -199,8 +199,10 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
* error without the detailed information.
*/
err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom);
- if (err)
+ if (err) {
dev_err(priv->dev, "DRAM syndrom read failed.\n");
+ return;
+ }
serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom);
derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom);
@@ -213,20 +215,26 @@ static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
}
err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info);
- if (err)
+ if (err) {
dev_err(priv->dev, "DRAM additional info read failed.\n");
+ return;
+ }
err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info);
ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0;
err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0);
- if (err)
+ if (err) {
dev_err(priv->dev, "Error addr 0 read failed.\n");
+ return;
+ }
err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1);
- if (err)
+ if (err) {
dev_err(priv->dev, "Error addr 1 read failed.\n");
+ return;
+ }
ecc_dimm_addr = ((u64)edea1 << 32) | edea0;
@@ -250,8 +258,10 @@ static void bluefield_edac_check(struct mem_ctl_info *mci)
return;
err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count);
- if (err)
+ if (err) {
dev_err(priv->dev, "ECC count read failed.\n");
+ return;
+ }
single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count);
double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count);
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
deleted file mode 100644
index c2420e2287ff..000000000000
--- a/drivers/edac/cell_edac.c
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Cell MIC driver for ECC counting
- *
- * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
- * <benh@kernel.crashing.org>
- *
- * This file may be distributed under the terms of the
- * GNU General Public License.
- */
-#undef DEBUG
-
-#include <linux/edac.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/stop_machine.h>
-#include <linux/io.h>
-#include <linux/of_address.h>
-#include <asm/machdep.h>
-#include <asm/cell-regs.h>
-
-#include "edac_module.h"
-
-struct cell_edac_priv
-{
- struct cbe_mic_tm_regs __iomem *regs;
- int node;
- int chanmask;
-#ifdef DEBUG
- u64 prev_fir;
-#endif
-};
-
-static void cell_edac_count_ce(struct mem_ctl_info *mci, int chan, u64 ar)
-{
- struct cell_edac_priv *priv = mci->pvt_info;
- struct csrow_info *csrow = mci->csrows[0];
- unsigned long address, pfn, offset, syndrome;
-
- dev_dbg(mci->pdev, "ECC CE err on node %d, channel %d, ar = 0x%016llx\n",
- priv->node, chan, ar);
-
- /* Address decoding is likely a bit bogus, to dbl check */
- address = (ar & 0xffffffffe0000000ul) >> 29;
- if (priv->chanmask == 0x3)
- address = (address << 1) | chan;
- pfn = address >> PAGE_SHIFT;
- offset = address & ~PAGE_MASK;
- syndrome = (ar & 0x000000001fe00000ul) >> 21;
-
- /* TODO: Decoding of the error address */
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- csrow->first_page + pfn, offset, syndrome,
- 0, chan, -1, "", "");
-}
-
-static void cell_edac_count_ue(struct mem_ctl_info *mci, int chan, u64 ar)
-{
- struct cell_edac_priv *priv = mci->pvt_info;
- struct csrow_info *csrow = mci->csrows[0];
- unsigned long address, pfn, offset;
-
- dev_dbg(mci->pdev, "ECC UE err on node %d, channel %d, ar = 0x%016llx\n",
- priv->node, chan, ar);
-
- /* Address decoding is likely a bit bogus, to dbl check */
- address = (ar & 0xffffffffe0000000ul) >> 29;
- if (priv->chanmask == 0x3)
- address = (address << 1) | chan;
- pfn = address >> PAGE_SHIFT;
- offset = address & ~PAGE_MASK;
-
- /* TODO: Decoding of the error address */
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
- csrow->first_page + pfn, offset, 0,
- 0, chan, -1, "", "");
-}
-
-static void cell_edac_check(struct mem_ctl_info *mci)
-{
- struct cell_edac_priv *priv = mci->pvt_info;
- u64 fir, addreg, clear = 0;
-
- fir = in_be64(&priv->regs->mic_fir);
-#ifdef DEBUG
- if (fir != priv->prev_fir) {
- dev_dbg(mci->pdev, "fir change : 0x%016lx\n", fir);
- priv->prev_fir = fir;
- }
-#endif
- if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_SINGLE_0_ERR)) {
- addreg = in_be64(&priv->regs->mic_df_ecc_address_0);
- clear |= CBE_MIC_FIR_ECC_SINGLE_0_RESET;
- cell_edac_count_ce(mci, 0, addreg);
- }
- if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_SINGLE_1_ERR)) {
- addreg = in_be64(&priv->regs->mic_df_ecc_address_1);
- clear |= CBE_MIC_FIR_ECC_SINGLE_1_RESET;
- cell_edac_count_ce(mci, 1, addreg);
- }
- if ((priv->chanmask & 0x1) && (fir & CBE_MIC_FIR_ECC_MULTI_0_ERR)) {
- addreg = in_be64(&priv->regs->mic_df_ecc_address_0);
- clear |= CBE_MIC_FIR_ECC_MULTI_0_RESET;
- cell_edac_count_ue(mci, 0, addreg);
- }
- if ((priv->chanmask & 0x2) && (fir & CBE_MIC_FIR_ECC_MULTI_1_ERR)) {
- addreg = in_be64(&priv->regs->mic_df_ecc_address_1);
- clear |= CBE_MIC_FIR_ECC_MULTI_1_RESET;
- cell_edac_count_ue(mci, 1, addreg);
- }
-
- /* The procedure for clearing FIR bits is a bit ... weird */
- if (clear) {
- fir &= ~(CBE_MIC_FIR_ECC_ERR_MASK | CBE_MIC_FIR_ECC_SET_MASK);
- fir |= CBE_MIC_FIR_ECC_RESET_MASK;
- fir &= ~clear;
- out_be64(&priv->regs->mic_fir, fir);
- (void)in_be64(&priv->regs->mic_fir);
-
- mb(); /* sync up */
-#ifdef DEBUG
- fir = in_be64(&priv->regs->mic_fir);
- dev_dbg(mci->pdev, "fir clear : 0x%016lx\n", fir);
-#endif
- }
-}
-
-static void cell_edac_init_csrows(struct mem_ctl_info *mci)
-{
- struct csrow_info *csrow = mci->csrows[0];
- struct dimm_info *dimm;
- struct cell_edac_priv *priv = mci->pvt_info;
- struct device_node *np;
- int j;
- u32 nr_pages;
-
- for_each_node_by_name(np, "memory") {
- struct resource r;
-
- /* We "know" that the Cell firmware only creates one entry
- * in the "memory" nodes. If that changes, this code will
- * need to be adapted.
- */
- if (of_address_to_resource(np, 0, &r))
- continue;
- if (of_node_to_nid(np) != priv->node)
- continue;
- csrow->first_page = r.start >> PAGE_SHIFT;
- nr_pages = resource_size(&r) >> PAGE_SHIFT;
- csrow->last_page = csrow->first_page + nr_pages - 1;
-
- for (j = 0; j < csrow->nr_channels; j++) {
- dimm = csrow->channels[j]->dimm;
- dimm->mtype = MEM_XDR;
- dimm->edac_mode = EDAC_SECDED;
- dimm->nr_pages = nr_pages / csrow->nr_channels;
- }
- dev_dbg(mci->pdev,
- "Initialized on node %d, chanmask=0x%x,"
- " first_page=0x%lx, nr_pages=0x%x\n",
- priv->node, priv->chanmask,
- csrow->first_page, nr_pages);
- break;
- }
- of_node_put(np);
-}
-
-static int cell_edac_probe(struct platform_device *pdev)
-{
- struct cbe_mic_tm_regs __iomem *regs;
- struct mem_ctl_info *mci;
- struct edac_mc_layer layers[2];
- struct cell_edac_priv *priv;
- u64 reg;
- int rc, chanmask, num_chans;
-
- regs = cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(pdev->id));
- if (regs == NULL)
- return -ENODEV;
-
- edac_op_state = EDAC_OPSTATE_POLL;
-
- /* Get channel population */
- reg = in_be64(&regs->mic_mnt_cfg);
- dev_dbg(&pdev->dev, "MIC_MNT_CFG = 0x%016llx\n", reg);
- chanmask = 0;
- if (reg & CBE_MIC_MNT_CFG_CHAN_0_POP)
- chanmask |= 0x1;
- if (reg & CBE_MIC_MNT_CFG_CHAN_1_POP)
- chanmask |= 0x2;
- if (chanmask == 0) {
- dev_warn(&pdev->dev,
- "Yuck ! No channel populated ? Aborting !\n");
- return -ENODEV;
- }
- dev_dbg(&pdev->dev, "Initial FIR = 0x%016llx\n",
- in_be64(&regs->mic_fir));
-
- /* Allocate & init EDAC MC data structure */
- num_chans = chanmask == 3 ? 2 : 1;
-
- layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = 1;
- layers[0].is_virt_csrow = true;
- layers[1].type = EDAC_MC_LAYER_CHANNEL;
- layers[1].size = num_chans;
- layers[1].is_virt_csrow = false;
- mci = edac_mc_alloc(pdev->id, ARRAY_SIZE(layers), layers,
- sizeof(struct cell_edac_priv));
- if (mci == NULL)
- return -ENOMEM;
- priv = mci->pvt_info;
- priv->regs = regs;
- priv->node = pdev->id;
- priv->chanmask = chanmask;
- mci->pdev = &pdev->dev;
- mci->mtype_cap = MEM_FLAG_XDR;
- mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED;
- mci->edac_cap = EDAC_FLAG_EC | EDAC_FLAG_SECDED;
- mci->mod_name = "cell_edac";
- mci->ctl_name = "MIC";
- mci->dev_name = dev_name(&pdev->dev);
- mci->edac_check = cell_edac_check;
- cell_edac_init_csrows(mci);
-
- /* Register with EDAC core */
- rc = edac_mc_add_mc(mci);
- if (rc) {
- dev_err(&pdev->dev, "failed to register with EDAC core\n");
- edac_mc_free(mci);
- return rc;
- }
-
- return 0;
-}
-
-static void cell_edac_remove(struct platform_device *pdev)
-{
- struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
- if (mci)
- edac_mc_free(mci);
-}
-
-static struct platform_driver cell_edac_driver = {
- .driver = {
- .name = "cbe-mic",
- },
- .probe = cell_edac_probe,
- .remove = cell_edac_remove,
-};
-
-static int __init cell_edac_init(void)
-{
- /* Sanity check registers data structure */
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_df_ecc_address_0) != 0xf8);
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_df_ecc_address_1) != 0x1b8);
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_df_config) != 0x218);
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_fir) != 0x230);
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_mnt_cfg) != 0x210);
- BUILD_BUG_ON(offsetof(struct cbe_mic_tm_regs,
- mic_exc) != 0x208);
-
- return platform_driver_register(&cell_edac_driver);
-}
-
-static void __exit cell_edac_exit(void)
-{
- platform_driver_unregister(&cell_edac_driver);
-}
-
-module_init(cell_edac_init);
-module_exit(cell_edac_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
-MODULE_DESCRIPTION("ECC counting for Cell MIC");
diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c
index 4804332d9946..8195fc9c9354 100644
--- a/drivers/edac/debugfs.c
+++ b/drivers/edac/debugfs.c
@@ -1,4 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/string_choices.h>
+
#include "edac_module.h"
static struct dentry *edac_debugfs;
@@ -22,7 +25,7 @@ static ssize_t edac_fake_inject_write(struct file *file,
"Generating %d %s fake error%s to %d.%d.%d to test core handling. NOTE: this won't test the driver-specific decoding logic.\n",
errcount,
(type == HW_EVENT_ERR_UNCORRECTED) ? "UE" : "CE",
- errcount > 1 ? "s" : "",
+ str_plural(errcount),
mci->fake_inject_layer[0],
mci->fake_inject_layer[1],
mci->fake_inject_layer[2]
diff --git a/drivers/edac/ecs.c b/drivers/edac/ecs.c
new file mode 100755
index 000000000000..51c451c7f0f0
--- /dev/null
+++ b/drivers/edac/ecs.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The generic ECS driver is designed to support control of on-die error
+ * check scrub (e.g., DDR5 ECS). The common sysfs ECS interface abstracts
+ * the control of various ECS functionalities into a unified set of functions.
+ *
+ * Copyright (c) 2024-2025 HiSilicon Limited.
+ */
+
+#include <linux/edac.h>
+
+#define EDAC_ECS_FRU_NAME "ecs_fru"
+
+enum edac_ecs_attributes {
+ ECS_LOG_ENTRY_TYPE,
+ ECS_MODE,
+ ECS_RESET,
+ ECS_THRESHOLD,
+ ECS_MAX_ATTRS
+};
+
+struct edac_ecs_dev_attr {
+ struct device_attribute dev_attr;
+ int fru_id;
+};
+
+struct edac_ecs_fru_context {
+ char name[EDAC_FEAT_NAME_LEN];
+ struct edac_ecs_dev_attr dev_attr[ECS_MAX_ATTRS];
+ struct attribute *ecs_attrs[ECS_MAX_ATTRS + 1];
+ struct attribute_group group;
+};
+
+struct edac_ecs_context {
+ u16 num_media_frus;
+ struct edac_ecs_fru_context *fru_ctxs;
+};
+
+#define TO_ECS_DEV_ATTR(_dev_attr) \
+ container_of(_dev_attr, struct edac_ecs_dev_attr, dev_attr)
+
+#define EDAC_ECS_ATTR_SHOW(attrib, cb, type, format) \
+static ssize_t attrib##_show(struct device *ras_feat_dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \
+ dev_attr->fru_id, &data); \
+ if (ret) \
+ return ret; \
+ \
+ return sysfs_emit(buf, format, data); \
+}
+
+EDAC_ECS_ATTR_SHOW(log_entry_type, get_log_entry_type, u32, "%u\n")
+EDAC_ECS_ATTR_SHOW(mode, get_mode, u32, "%u\n")
+EDAC_ECS_ATTR_SHOW(threshold, get_threshold, u32, "%u\n")
+
+#define EDAC_ECS_ATTR_STORE(attrib, cb, type, conv_func) \
+static ssize_t attrib##_store(struct device *ras_feat_dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ struct edac_ecs_dev_attr *dev_attr = TO_ECS_DEV_ATTR(attr); \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = conv_func(buf, 0, &data); \
+ if (ret < 0) \
+ return ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->ecs.private, \
+ dev_attr->fru_id, data); \
+ if (ret) \
+ return ret; \
+ \
+ return len; \
+}
+
+EDAC_ECS_ATTR_STORE(log_entry_type, set_log_entry_type, unsigned long, kstrtoul)
+EDAC_ECS_ATTR_STORE(mode, set_mode, unsigned long, kstrtoul)
+EDAC_ECS_ATTR_STORE(reset, reset, unsigned long, kstrtoul)
+EDAC_ECS_ATTR_STORE(threshold, set_threshold, unsigned long, kstrtoul)
+
+static umode_t ecs_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id)
+{
+ struct device *ras_feat_dev = kobj_to_dev(kobj);
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_ecs_ops *ops = ctx->ecs.ecs_ops;
+
+ switch (attr_id) {
+ case ECS_LOG_ENTRY_TYPE:
+ if (ops->get_log_entry_type) {
+ if (ops->set_log_entry_type)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case ECS_MODE:
+ if (ops->get_mode) {
+ if (ops->set_mode)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case ECS_RESET:
+ if (ops->reset)
+ return a->mode;
+ break;
+ case ECS_THRESHOLD:
+ if (ops->get_threshold) {
+ if (ops->set_threshold)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+#define EDAC_ECS_ATTR_RO(_name, _fru_id) \
+ ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RO(_name), \
+ .fru_id = _fru_id })
+
+#define EDAC_ECS_ATTR_WO(_name, _fru_id) \
+ ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_WO(_name), \
+ .fru_id = _fru_id })
+
+#define EDAC_ECS_ATTR_RW(_name, _fru_id) \
+ ((struct edac_ecs_dev_attr) { .dev_attr = __ATTR_RW(_name), \
+ .fru_id = _fru_id })
+
+static int ecs_create_desc(struct device *ecs_dev, const struct attribute_group **attr_groups,
+ u16 num_media_frus)
+{
+ struct edac_ecs_context *ecs_ctx;
+ u32 fru;
+
+ ecs_ctx = devm_kzalloc(ecs_dev, sizeof(*ecs_ctx), GFP_KERNEL);
+ if (!ecs_ctx)
+ return -ENOMEM;
+
+ ecs_ctx->num_media_frus = num_media_frus;
+ ecs_ctx->fru_ctxs = devm_kcalloc(ecs_dev, num_media_frus,
+ sizeof(*ecs_ctx->fru_ctxs),
+ GFP_KERNEL);
+ if (!ecs_ctx->fru_ctxs)
+ return -ENOMEM;
+
+ for (fru = 0; fru < num_media_frus; fru++) {
+ struct edac_ecs_fru_context *fru_ctx = &ecs_ctx->fru_ctxs[fru];
+ struct attribute_group *group = &fru_ctx->group;
+ int i;
+
+ fru_ctx->dev_attr[ECS_LOG_ENTRY_TYPE] = EDAC_ECS_ATTR_RW(log_entry_type, fru);
+ fru_ctx->dev_attr[ECS_MODE] = EDAC_ECS_ATTR_RW(mode, fru);
+ fru_ctx->dev_attr[ECS_RESET] = EDAC_ECS_ATTR_WO(reset, fru);
+ fru_ctx->dev_attr[ECS_THRESHOLD] = EDAC_ECS_ATTR_RW(threshold, fru);
+
+ for (i = 0; i < ECS_MAX_ATTRS; i++) {
+ sysfs_attr_init(&fru_ctx->dev_attr[i].dev_attr.attr);
+ fru_ctx->ecs_attrs[i] = &fru_ctx->dev_attr[i].dev_attr.attr;
+ }
+
+ sprintf(fru_ctx->name, "%s%d", EDAC_ECS_FRU_NAME, fru);
+ group->name = fru_ctx->name;
+ group->attrs = fru_ctx->ecs_attrs;
+ group->is_visible = ecs_attr_visible;
+
+ attr_groups[fru] = group;
+ }
+
+ return 0;
+}
+
+/**
+ * edac_ecs_get_desc - get EDAC ECS descriptors
+ * @ecs_dev: client device, supports ECS feature
+ * @attr_groups: pointer to attribute group container
+ * @num_media_frus: number of media FRUs in the device
+ *
+ * Return:
+ * * %0 - Success.
+ * * %-EINVAL - Invalid parameters passed.
+ * * %-ENOMEM - Dynamic memory allocation failed.
+ */
+int edac_ecs_get_desc(struct device *ecs_dev,
+ const struct attribute_group **attr_groups, u16 num_media_frus)
+{
+ if (!ecs_dev || !attr_groups || !num_media_frus)
+ return -EINVAL;
+
+ return ecs_create_desc(ecs_dev, attr_groups, num_media_frus);
+}
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index 621dc2a5d034..0734909b08a4 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -570,3 +570,188 @@ void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
block ? block->name : "N/A", count, msg);
}
EXPORT_SYMBOL_GPL(edac_device_handle_ue_count);
+
+static void edac_dev_release(struct device *dev)
+{
+ struct edac_dev_feat_ctx *ctx = container_of(dev, struct edac_dev_feat_ctx, dev);
+
+ kfree(ctx->mem_repair);
+ kfree(ctx->scrub);
+ kfree(ctx->dev.groups);
+ kfree(ctx);
+}
+
+static const struct device_type edac_dev_type = {
+ .name = "edac_dev",
+ .release = edac_dev_release,
+};
+
+static void edac_dev_unreg(void *data)
+{
+ device_unregister(data);
+}
+
+/**
+ * edac_dev_register - register device for RAS features with EDAC
+ * @parent: parent device.
+ * @name: name for the folder in the /sys/bus/edac/devices/,
+ * which is derived from the parent device.
+ * For e.g. /sys/bus/edac/devices/cxl_mem0/
+ * @private: parent driver's data to store in the context if any.
+ * @num_features: number of RAS features to register.
+ * @ras_features: list of RAS features to register.
+ *
+ * Return:
+ * * %0 - Success.
+ * * %-EINVAL - Invalid parameters passed.
+ * * %-ENOMEM - Dynamic memory allocation failed.
+ *
+ */
+int edac_dev_register(struct device *parent, char *name,
+ void *private, int num_features,
+ const struct edac_dev_feature *ras_features)
+{
+ const struct attribute_group **ras_attr_groups;
+ struct edac_dev_data *dev_data;
+ struct edac_dev_feat_ctx *ctx;
+ int mem_repair_cnt = 0;
+ int attr_gcnt = 0;
+ int ret = -ENOMEM;
+ int scrub_cnt = 0;
+ int feat;
+
+ if (!parent || !name || !num_features || !ras_features)
+ return -EINVAL;
+
+ /* Double parse to make space for attributes */
+ for (feat = 0; feat < num_features; feat++) {
+ switch (ras_features[feat].ft_type) {
+ case RAS_FEAT_SCRUB:
+ attr_gcnt++;
+ scrub_cnt++;
+ break;
+ case RAS_FEAT_ECS:
+ attr_gcnt += ras_features[feat].ecs_info.num_media_frus;
+ break;
+ case RAS_FEAT_MEM_REPAIR:
+ attr_gcnt++;
+ mem_repair_cnt++;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ras_attr_groups = kcalloc(attr_gcnt + 1, sizeof(*ras_attr_groups), GFP_KERNEL);
+ if (!ras_attr_groups)
+ goto ctx_free;
+
+ if (scrub_cnt) {
+ ctx->scrub = kcalloc(scrub_cnt, sizeof(*ctx->scrub), GFP_KERNEL);
+ if (!ctx->scrub)
+ goto groups_free;
+ }
+
+ if (mem_repair_cnt) {
+ ctx->mem_repair = kcalloc(mem_repair_cnt, sizeof(*ctx->mem_repair), GFP_KERNEL);
+ if (!ctx->mem_repair)
+ goto data_mem_free;
+ }
+
+ attr_gcnt = 0;
+ scrub_cnt = 0;
+ mem_repair_cnt = 0;
+ for (feat = 0; feat < num_features; feat++, ras_features++) {
+ switch (ras_features->ft_type) {
+ case RAS_FEAT_SCRUB:
+ if (!ras_features->scrub_ops || scrub_cnt != ras_features->instance) {
+ ret = -EINVAL;
+ goto data_mem_free;
+ }
+
+ dev_data = &ctx->scrub[scrub_cnt];
+ dev_data->instance = scrub_cnt;
+ dev_data->scrub_ops = ras_features->scrub_ops;
+ dev_data->private = ras_features->ctx;
+ ret = edac_scrub_get_desc(parent, &ras_attr_groups[attr_gcnt],
+ ras_features->instance);
+ if (ret)
+ goto data_mem_free;
+
+ scrub_cnt++;
+ attr_gcnt++;
+ break;
+ case RAS_FEAT_ECS:
+ if (!ras_features->ecs_ops) {
+ ret = -EINVAL;
+ goto data_mem_free;
+ }
+
+ dev_data = &ctx->ecs;
+ dev_data->ecs_ops = ras_features->ecs_ops;
+ dev_data->private = ras_features->ctx;
+ ret = edac_ecs_get_desc(parent, &ras_attr_groups[attr_gcnt],
+ ras_features->ecs_info.num_media_frus);
+ if (ret)
+ goto data_mem_free;
+
+ attr_gcnt += ras_features->ecs_info.num_media_frus;
+ break;
+ case RAS_FEAT_MEM_REPAIR:
+ if (!ras_features->mem_repair_ops ||
+ mem_repair_cnt != ras_features->instance) {
+ ret = -EINVAL;
+ goto data_mem_free;
+ }
+
+ dev_data = &ctx->mem_repair[mem_repair_cnt];
+ dev_data->instance = mem_repair_cnt;
+ dev_data->mem_repair_ops = ras_features->mem_repair_ops;
+ dev_data->private = ras_features->ctx;
+ ret = edac_mem_repair_get_desc(parent, &ras_attr_groups[attr_gcnt],
+ ras_features->instance);
+ if (ret)
+ goto data_mem_free;
+
+ mem_repair_cnt++;
+ attr_gcnt++;
+ break;
+ default:
+ ret = -EINVAL;
+ goto data_mem_free;
+ }
+ }
+
+ ctx->dev.parent = parent;
+ ctx->dev.bus = edac_get_sysfs_subsys();
+ ctx->dev.type = &edac_dev_type;
+ ctx->dev.groups = ras_attr_groups;
+ ctx->private = private;
+ dev_set_drvdata(&ctx->dev, ctx);
+
+ ret = dev_set_name(&ctx->dev, "%s", name);
+ if (ret)
+ goto data_mem_free;
+
+ ret = device_register(&ctx->dev);
+ if (ret) {
+ put_device(&ctx->dev);
+ return ret;
+ }
+
+ return devm_add_action_or_reset(parent, edac_dev_unreg, &ctx->dev);
+
+data_mem_free:
+ kfree(ctx->mem_repair);
+ kfree(ctx->scrub);
+groups_free:
+ kfree(ras_attr_groups);
+ctx_free:
+ kfree(ctx);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(edac_dev_register);
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index d6eed727b0cd..0959320fe51c 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -214,7 +214,7 @@ static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
unsigned int row, chn;
/*
- * Alocate and fill the csrow/channels structs
+ * Allocate and fill the csrow/channels structs
*/
mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
if (!mci->csrows)
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 4200aec04831..0f338adf7d93 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -422,7 +422,7 @@ static inline int nr_pages_per_csrow(struct csrow_info *csrow)
return nr_pages;
}
-/* Create a CSROW object under specifed edac_mc_device */
+/* Create a CSROW object under specified edac_mc_device */
static int edac_create_csrow_object(struct mem_ctl_info *mci,
struct csrow_info *csrow, int index)
{
@@ -449,7 +449,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
return 0;
}
-/* Create a CSROW object under specifed edac_mc_device */
+/* Create a CSROW object under specified edac_mc_device */
static int edac_create_csrow_objects(struct mem_ctl_info *mci)
{
int err, i;
@@ -636,7 +636,7 @@ static void dimm_release(struct device *dev)
*/
}
-/* Create a DIMM object under specifed memory controller device */
+/* Create a DIMM object under specified memory controller device */
static int edac_create_dimm_object(struct mem_ctl_info *mci,
struct dimm_info *dimm)
{
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 51556c72a967..355b527d839e 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -95,7 +95,7 @@ static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64,
static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
-static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable,
+static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, u32 *rrl_ctl,
u32 *offsets_scrub, u32 *offsets_demand,
u32 *offsets_demand2)
{
@@ -108,10 +108,10 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
if (enable) {
/* Save default configurations */
- imc->chan[chan].retry_rd_err_log_s = s;
- imc->chan[chan].retry_rd_err_log_d = d;
+ rrl_ctl[0] = s;
+ rrl_ctl[1] = d;
if (offsets_demand2)
- imc->chan[chan].retry_rd_err_log_d2 = d2;
+ rrl_ctl[2] = d2;
s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
s |= RETRY_RD_ERR_LOG_EN;
@@ -125,25 +125,25 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
}
} else {
/* Restore default configurations */
- if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
+ if (rrl_ctl[0] & RETRY_RD_ERR_LOG_UC)
s |= RETRY_RD_ERR_LOG_UC;
- if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
+ if (rrl_ctl[0] & RETRY_RD_ERR_LOG_NOOVER)
s |= RETRY_RD_ERR_LOG_NOOVER;
- if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
+ if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN))
s &= ~RETRY_RD_ERR_LOG_EN;
- if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
+ if (rrl_ctl[1] & RETRY_RD_ERR_LOG_UC)
d |= RETRY_RD_ERR_LOG_UC;
- if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
+ if (rrl_ctl[1] & RETRY_RD_ERR_LOG_NOOVER)
d |= RETRY_RD_ERR_LOG_NOOVER;
- if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
+ if (!(rrl_ctl[1] & RETRY_RD_ERR_LOG_EN))
d &= ~RETRY_RD_ERR_LOG_EN;
if (offsets_demand2) {
- if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC)
+ if (rrl_ctl[2] & RETRY_RD_ERR_LOG_UC)
d2 |= RETRY_RD_ERR_LOG_UC;
- if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER))
+ if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_NOOVER))
d2 &= ~RETRY_RD_ERR_LOG_NOOVER;
- if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN))
+ if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_EN))
d2 &= ~RETRY_RD_ERR_LOG_EN;
}
}
@@ -157,6 +157,7 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
static void enable_retry_rd_err_log(bool enable)
{
int i, j, imc_num, chan_num;
+ struct skx_channel *chan;
struct skx_imc *imc;
struct skx_dev *d;
@@ -171,8 +172,9 @@ static void enable_retry_rd_err_log(bool enable)
if (!imc->mbase)
continue;
+ chan = d->imc[i].chan;
for (j = 0; j < chan_num; j++)
- __enable_retry_rd_err_log(imc, j, enable,
+ __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0],
res_cfg->offsets_scrub,
res_cfg->offsets_demand,
res_cfg->offsets_demand2);
@@ -186,12 +188,13 @@ static void enable_retry_rd_err_log(bool enable)
if (!imc->mbase || !imc->hbm_mc)
continue;
+ chan = d->imc[i].chan;
for (j = 0; j < chan_num; j++) {
- __enable_retry_rd_err_log(imc, j, enable,
+ __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0],
res_cfg->offsets_scrub_hbm0,
res_cfg->offsets_demand_hbm0,
NULL);
- __enable_retry_rd_err_log(imc, j, enable,
+ __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[1],
res_cfg->offsets_scrub_hbm1,
res_cfg->offsets_demand_hbm1,
NULL);
@@ -751,6 +754,8 @@ static int i10nm_get_ddr_munits(void)
continue;
} else {
d->imc[lmc].mdev = mdev;
+ if (res_cfg->type == SPR)
+ skx_set_mc_mapping(d, i, lmc);
lmc++;
}
}
@@ -938,16 +943,18 @@ static struct res_config gnr_cfg = {
};
static const struct x86_cpu_id i10nm_cpuids[] = {
- X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_TREMONT_D, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
- X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_TREMONT_D, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
- X86_MATCH_VFM_STEPPINGS(INTEL_ICELAKE_X, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
- X86_MATCH_VFM_STEPPINGS(INTEL_ICELAKE_X, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
- X86_MATCH_VFM_STEPPINGS(INTEL_ICELAKE_D, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1),
- X86_MATCH_VFM_STEPPINGS(INTEL_SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg),
- X86_MATCH_VFM_STEPPINGS(INTEL_EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg),
- X86_MATCH_VFM_STEPPINGS(INTEL_GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
- X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
- X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_CRESTMONT, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
+ X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, X86_STEP_MIN, 0x3, &i10nm_cfg0),
+ X86_MATCH_VFM_STEPS(INTEL_ATOM_TREMONT_D, 0x4, X86_STEP_MAX, &i10nm_cfg1),
+ X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, X86_STEP_MIN, 0x3, &i10nm_cfg0),
+ X86_MATCH_VFM_STEPS(INTEL_ICELAKE_X, 0x4, X86_STEP_MAX, &i10nm_cfg1),
+ X86_MATCH_VFM( INTEL_ICELAKE_D, &i10nm_cfg1),
+
+ X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_cfg),
+ X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_cfg),
+ X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_cfg),
+ X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_cfg),
+ X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_cfg),
+ X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_cfg),
{}
};
MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
@@ -1010,7 +1017,7 @@ static struct notifier_block i10nm_mce_dec = {
static int __init i10nm_init(void)
{
- u8 mc = 0, src_id = 0, node_id = 0;
+ u8 mc = 0, src_id = 0;
const struct x86_cpu_id *id;
struct res_config *cfg;
const char *owner;
@@ -1070,19 +1077,14 @@ static int __init i10nm_init(void)
if (rc < 0)
goto fail;
- rc = skx_get_node_id(d, &node_id);
- if (rc < 0)
- goto fail;
-
- edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
+ edac_dbg(2, "src_id = %d\n", src_id);
for (i = 0; i < imc_num; i++) {
if (!d->imc[i].mdev)
continue;
d->imc[i].mc = mc++;
d->imc[i].lmc = i;
- d->imc[i].src_id = src_id;
- d->imc[i].node_id = node_id;
+ d->imc[i].src_id = src_id;
if (d->imc[i].hbm_mc) {
d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
d->imc[i].num_channels = cfg->hbm_chan_num;
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index 4b5a71f8739d..4a1bebc1ff14 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -338,11 +338,11 @@ struct i5000_pvt {
u16 mir0, mir1, mir2;
- u16 b0_mtr[NUM_MTRS]; /* Memory Technlogy Reg */
+ u16 b0_mtr[NUM_MTRS]; /* Memory Technology Reg */
u16 b0_ambpresent0; /* Branch 0, Channel 0 */
- u16 b0_ambpresent1; /* Brnach 0, Channel 1 */
+ u16 b0_ambpresent1; /* Branch 0, Channel 1 */
- u16 b1_mtr[NUM_MTRS]; /* Memory Technlogy Reg */
+ u16 b1_mtr[NUM_MTRS]; /* Memory Technology Reg */
u16 b1_ambpresent0; /* Branch 1, Channel 8 */
u16 b1_ambpresent1; /* Branch 1, Channel 1 */
@@ -1210,7 +1210,7 @@ static void i5000_get_mc_regs(struct mem_ctl_info *mci)
&pvt->b0_ambpresent1);
edac_dbg(2, "\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1);
- /* Only if we have 2 branchs (4 channels) */
+ /* Only if we have 2 branches (4 channels) */
if (pvt->maxch < CHANNELS_PER_BRANCH) {
pvt->b1_ambpresent0 = 0;
pvt->b1_ambpresent1 = 0;
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 49b4499269fb..b5cf25905b05 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -31,6 +31,7 @@
#include <linux/slab.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
+#include <linux/string_choices.h>
#include "edac_module.h"
@@ -899,7 +900,7 @@ static void decode_mtr(int slot_row, u16 mtr)
edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n",
- MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
+ str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr)));
edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
edac_dbg(2, "\t\tNUMRANK: %s\n",
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index 61adaa872ba7..69068f8d0cad 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
+#include <linux/string_choices.h>
#include "edac_module.h"
@@ -620,7 +621,7 @@ static int decode_mtr(struct i7300_pvt *pvt,
edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n",
- MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
+ str_enabled_disabled(MTR_DIMMS_ETHROTTLE(mtr)));
edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
edac_dbg(2, "\t\tNUMRANK: %s\n",
@@ -871,9 +872,9 @@ static int i7300_get_mc_regs(struct mem_ctl_info *mci)
IS_MIRRORED(pvt->mc_settings) ? "" : "non-");
edac_dbg(0, "Error detection is %s\n",
- IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
+ str_enabled_disabled(IS_ECC_ENABLED(pvt->mc_settings)));
edac_dbg(0, "Retry is %s\n",
- IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
+ str_enabled_disabled(IS_RETRY_ENABLED(pvt->mc_settings)));
/* Get Memory Interleave Range registers */
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0,
diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
index 4fc16922dc1a..204834149579 100644
--- a/drivers/edac/ie31200_edac.c
+++ b/drivers/edac/ie31200_edac.c
@@ -51,6 +51,7 @@
#include <linux/edac.h>
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <asm/mce.h>
#include "edac_module.h"
#define EDAC_MOD_STR "ie31200_edac"
@@ -84,44 +85,23 @@
#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9 0x3ec6
#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10 0x3eca
-/* Test if HB is for Skylake or later. */
-#define DEVICE_ID_SKYLAKE_OR_LATER(did) \
- (((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \
- ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \
- ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \
- ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \
- ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_12) || \
- (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \
- PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK))
-
-#define IE31200_DIMMS 4
-#define IE31200_RANKS 8
-#define IE31200_RANKS_PER_CHANNEL 4
+/* Raptor Lake-S */
+#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703
+#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640
+#define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630
+
+#define IE31200_RANKS_PER_CHANNEL 8
#define IE31200_DIMMS_PER_CHANNEL 2
#define IE31200_CHANNELS 2
+#define IE31200_IMC_NUM 2
/* Intel IE31200 register addresses - device 0 function 0 - DRAM Controller */
#define IE31200_MCHBAR_LOW 0x48
#define IE31200_MCHBAR_HIGH 0x4c
-#define IE31200_MCHBAR_MASK GENMASK_ULL(38, 15)
-#define IE31200_MMR_WINDOW_SIZE BIT(15)
/*
* Error Status Register (16b)
*
- * 15 reserved
- * 14 Isochronous TBWRR Run Behind FIFO Full
- * (ITCV)
- * 13 Isochronous TBWRR Run Behind FIFO Put
- * (ITSTV)
- * 12 reserved
- * 11 MCH Thermal Sensor Event
- * for SMI/SCI/SERR (GTSE)
- * 10 reserved
- * 9 LOCK to non-DRAM Memory Flag (LCKF)
- * 8 reserved
- * 7 DRAM Throttle Flag (DTF)
- * 6:2 reserved
* 1 Multi-bit DRAM ECC Error Flag (DMERR)
* 0 Single-bit DRAM ECC Error Flag (DSERR)
*/
@@ -130,68 +110,60 @@
#define IE31200_ERRSTS_CE BIT(0)
#define IE31200_ERRSTS_BITS (IE31200_ERRSTS_UE | IE31200_ERRSTS_CE)
-/*
- * Channel 0 ECC Error Log (64b)
- *
- * 63:48 Error Column Address (ERRCOL)
- * 47:32 Error Row Address (ERRROW)
- * 31:29 Error Bank Address (ERRBANK)
- * 28:27 Error Rank Address (ERRRANK)
- * 26:24 reserved
- * 23:16 Error Syndrome (ERRSYND)
- * 15: 2 reserved
- * 1 Multiple Bit Error Status (MERRSTS)
- * 0 Correctable Error Status (CERRSTS)
- */
-
-#define IE31200_C0ECCERRLOG 0x40c8
-#define IE31200_C1ECCERRLOG 0x44c8
-#define IE31200_C0ECCERRLOG_SKL 0x4048
-#define IE31200_C1ECCERRLOG_SKL 0x4448
-#define IE31200_ECCERRLOG_CE BIT(0)
-#define IE31200_ECCERRLOG_UE BIT(1)
-#define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27)
-#define IE31200_ECCERRLOG_RANK_SHIFT 27
-#define IE31200_ECCERRLOG_SYNDROME_BITS GENMASK_ULL(23, 16)
-#define IE31200_ECCERRLOG_SYNDROME_SHIFT 16
-
-#define IE31200_ECCERRLOG_SYNDROME(log) \
- ((log & IE31200_ECCERRLOG_SYNDROME_BITS) >> \
- IE31200_ECCERRLOG_SYNDROME_SHIFT)
-
#define IE31200_CAPID0 0xe4
#define IE31200_CAPID0_PDCD BIT(4)
#define IE31200_CAPID0_DDPCD BIT(6)
#define IE31200_CAPID0_ECC BIT(1)
-#define IE31200_MAD_DIMM_0_OFFSET 0x5004
-#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C
-#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0)
-#define IE31200_MAD_DIMM_A_RANK BIT(17)
-#define IE31200_MAD_DIMM_A_RANK_SHIFT 17
-#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10)
-#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10
-#define IE31200_MAD_DIMM_A_WIDTH BIT(19)
-#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19
-#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8)
-#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8
-
-/* Skylake reports 1GB increments, everything else is 256MB */
-#define IE31200_PAGES(n, skl) \
- (n << (28 + (2 * skl) - PAGE_SHIFT))
+/* Non-constant mask variant of FIELD_GET() */
+#define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1))
static int nr_channels;
static struct pci_dev *mci_pdev;
static int ie31200_registered = 1;
+struct res_config {
+ enum mem_type mtype;
+ bool cmci;
+ int imc_num;
+ /* Host MMIO configuration register */
+ u64 reg_mchbar_mask;
+ u64 reg_mchbar_window_size;
+ /* ECC error log register */
+ u64 reg_eccerrlog_offset[IE31200_CHANNELS];
+ u64 reg_eccerrlog_ce_mask;
+ u64 reg_eccerrlog_ce_ovfl_mask;
+ u64 reg_eccerrlog_ue_mask;
+ u64 reg_eccerrlog_ue_ovfl_mask;
+ u64 reg_eccerrlog_rank_mask;
+ u64 reg_eccerrlog_syndrome_mask;
+ /* MSR to clear ECC error log register */
+ u32 msr_clear_eccerrlog_offset;
+ /* DIMM characteristics register */
+ u64 reg_mad_dimm_size_granularity;
+ u64 reg_mad_dimm_offset[IE31200_CHANNELS];
+ u32 reg_mad_dimm_size_mask[IE31200_DIMMS_PER_CHANNEL];
+ u32 reg_mad_dimm_rank_mask[IE31200_DIMMS_PER_CHANNEL];
+ u32 reg_mad_dimm_width_mask[IE31200_DIMMS_PER_CHANNEL];
+};
+
struct ie31200_priv {
void __iomem *window;
void __iomem *c0errlog;
void __iomem *c1errlog;
+ struct res_config *cfg;
+ struct mem_ctl_info *mci;
+ struct pci_dev *pdev;
+ struct device dev;
};
+static struct ie31200_pvt {
+ struct ie31200_priv *priv[IE31200_IMC_NUM];
+} ie31200_pvt;
+
enum ie31200_chips {
IE31200 = 0,
+ IE31200_1 = 1,
};
struct ie31200_dev_info {
@@ -202,18 +174,22 @@ struct ie31200_error_info {
u16 errsts;
u16 errsts2;
u64 eccerrlog[IE31200_CHANNELS];
+ u64 erraddr;
};
static const struct ie31200_dev_info ie31200_devs[] = {
[IE31200] = {
.ctl_name = "IE31200"
},
+ [IE31200_1] = {
+ .ctl_name = "IE31200_1"
+ },
};
struct dimm_data {
- u8 size; /* in multiples of 256MB, except Skylake is 1GB */
- u8 dual_rank : 1,
- x16_width : 2; /* 0 means x8 width */
+ u64 size; /* in bytes */
+ u8 ranks;
+ enum dev_type dtype;
};
static int how_many_channels(struct pci_dev *pdev)
@@ -251,29 +227,54 @@ static bool ecc_capable(struct pci_dev *pdev)
return true;
}
-static int eccerrlog_row(u64 log)
-{
- return ((log & IE31200_ECCERRLOG_RANK_BITS) >>
- IE31200_ECCERRLOG_RANK_SHIFT);
-}
+#define mci_to_pci_dev(mci) (((struct ie31200_priv *)(mci)->pvt_info)->pdev)
static void ie31200_clear_error_info(struct mem_ctl_info *mci)
{
+ struct ie31200_priv *priv = mci->pvt_info;
+ struct res_config *cfg = priv->cfg;
+
+ /*
+ * The PCI ERRSTS register is deprecated. Write the MSR to clear
+ * the ECC error log registers in all memory controllers.
+ */
+ if (cfg->msr_clear_eccerrlog_offset) {
+ if (wrmsr_safe(cfg->msr_clear_eccerrlog_offset,
+ cfg->reg_eccerrlog_ce_mask |
+ cfg->reg_eccerrlog_ce_ovfl_mask |
+ cfg->reg_eccerrlog_ue_mask |
+ cfg->reg_eccerrlog_ue_ovfl_mask, 0) < 0)
+ ie31200_printk(KERN_ERR, "Failed to wrmsr.\n");
+
+ return;
+ }
+
/*
* Clear any error bits.
* (Yes, we really clear bits by writing 1 to them.)
*/
- pci_write_bits16(to_pci_dev(mci->pdev), IE31200_ERRSTS,
+ pci_write_bits16(mci_to_pci_dev(mci), IE31200_ERRSTS,
IE31200_ERRSTS_BITS, IE31200_ERRSTS_BITS);
}
static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
struct ie31200_error_info *info)
{
- struct pci_dev *pdev;
+ struct pci_dev *pdev = mci_to_pci_dev(mci);
struct ie31200_priv *priv = mci->pvt_info;
- pdev = to_pci_dev(mci->pdev);
+ /*
+ * The PCI ERRSTS register is deprecated, directly read the
+ * MMIO-mapped ECC error log registers.
+ */
+ if (priv->cfg->msr_clear_eccerrlog_offset) {
+ info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
+ if (nr_channels == 2)
+ info->eccerrlog[1] = lo_hi_readq(priv->c1errlog);
+
+ ie31200_clear_error_info(mci);
+ return;
+ }
/*
* This is a mess because there is no atomic way to read all the
@@ -309,46 +310,56 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
static void ie31200_process_error_info(struct mem_ctl_info *mci,
struct ie31200_error_info *info)
{
+ struct ie31200_priv *priv = mci->pvt_info;
+ struct res_config *cfg = priv->cfg;
int channel;
u64 log;
- if (!(info->errsts & IE31200_ERRSTS_BITS))
- return;
+ if (!cfg->msr_clear_eccerrlog_offset) {
+ if (!(info->errsts & IE31200_ERRSTS_BITS))
+ return;
- if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
- -1, -1, -1, "UE overwrote CE", "");
- info->errsts = info->errsts2;
+ if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
+ -1, -1, -1, "UE overwrote CE", "");
+ info->errsts = info->errsts2;
+ }
}
for (channel = 0; channel < nr_channels; channel++) {
log = info->eccerrlog[channel];
- if (log & IE31200_ECCERRLOG_UE) {
+ if (log & cfg->reg_eccerrlog_ue_mask) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
- 0, 0, 0,
- eccerrlog_row(log),
+ info->erraddr >> PAGE_SHIFT, 0, 0,
+ field_get(cfg->reg_eccerrlog_rank_mask, log),
channel, -1,
"ie31200 UE", "");
- } else if (log & IE31200_ECCERRLOG_CE) {
+ } else if (log & cfg->reg_eccerrlog_ce_mask) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
- 0, 0,
- IE31200_ECCERRLOG_SYNDROME(log),
- eccerrlog_row(log),
+ info->erraddr >> PAGE_SHIFT, 0,
+ field_get(cfg->reg_eccerrlog_syndrome_mask, log),
+ field_get(cfg->reg_eccerrlog_rank_mask, log),
channel, -1,
"ie31200 CE", "");
}
}
}
-static void ie31200_check(struct mem_ctl_info *mci)
+static void __ie31200_check(struct mem_ctl_info *mci, struct mce *mce)
{
struct ie31200_error_info info;
+ info.erraddr = mce ? mce->addr : 0;
ie31200_get_and_clear_error_info(mci, &info);
ie31200_process_error_info(mci, &info);
}
-static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
+static void ie31200_check(struct mem_ctl_info *mci)
+{
+ __ie31200_check(mci, NULL);
+}
+
+static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev, struct res_config *cfg, int mc)
{
union {
u64 mchbar;
@@ -361,7 +372,8 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
pci_read_config_dword(pdev, IE31200_MCHBAR_LOW, &u.mchbar_low);
pci_read_config_dword(pdev, IE31200_MCHBAR_HIGH, &u.mchbar_high);
- u.mchbar &= IE31200_MCHBAR_MASK;
+ u.mchbar &= cfg->reg_mchbar_mask;
+ u.mchbar += cfg->reg_mchbar_window_size * mc;
if (u.mchbar != (resource_size_t)u.mchbar) {
ie31200_printk(KERN_ERR, "mmio space beyond accessible range (0x%llx)\n",
@@ -369,7 +381,7 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
return NULL;
}
- window = ioremap(u.mchbar, IE31200_MMR_WINDOW_SIZE);
+ window = ioremap(u.mchbar, cfg->reg_mchbar_window_size);
if (!window)
ie31200_printk(KERN_ERR, "Cannot map mmio space at 0x%llx\n",
(unsigned long long)u.mchbar);
@@ -377,155 +389,108 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
return window;
}
-static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
- int chan)
+static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int dimm,
+ struct res_config *cfg)
{
- dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE;
- dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0;
- dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >>
- (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4)));
+ dd->size = field_get(cfg->reg_mad_dimm_size_mask[dimm], addr_decode) * cfg->reg_mad_dimm_size_granularity;
+ dd->ranks = field_get(cfg->reg_mad_dimm_rank_mask[dimm], addr_decode) + 1;
+ dd->dtype = field_get(cfg->reg_mad_dimm_width_mask[dimm], addr_decode) + DEV_X8;
}
-static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
- int chan)
+static void ie31200_get_dimm_config(struct mem_ctl_info *mci, void __iomem *window,
+ struct res_config *cfg, int mc)
{
- dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE;
- dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0;
- dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0;
-}
+ struct dimm_data dimm_info;
+ struct dimm_info *dimm;
+ unsigned long nr_pages;
+ u32 addr_decode;
+ int i, j, k;
-static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan,
- bool skl)
-{
- if (skl)
- __skl_populate_dimm_info(dd, addr_decode, chan);
- else
- __populate_dimm_info(dd, addr_decode, chan);
-}
+ for (i = 0; i < IE31200_CHANNELS; i++) {
+ addr_decode = readl(window + cfg->reg_mad_dimm_offset[i]);
+ edac_dbg(0, "addr_decode: 0x%x\n", addr_decode);
+ for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
+ populate_dimm_info(&dimm_info, addr_decode, j, cfg);
+ edac_dbg(0, "mc: %d, channel: %d, dimm: %d, size: %lld MiB, ranks: %d, DRAM chip type: %d\n",
+ mc, i, j, dimm_info.size >> 20,
+ dimm_info.ranks,
+ dimm_info.dtype);
-static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
+ nr_pages = MiB_TO_PAGES(dimm_info.size >> 20);
+ if (nr_pages == 0)
+ continue;
+
+ nr_pages = nr_pages / dimm_info.ranks;
+ for (k = 0; k < dimm_info.ranks; k++) {
+ dimm = edac_get_dimm(mci, (j * dimm_info.ranks) + k, i, 0);
+ dimm->nr_pages = nr_pages;
+ edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
+ dimm->grain = 8; /* just a guess */
+ dimm->mtype = cfg->mtype;
+ dimm->dtype = dimm_info.dtype;
+ dimm->edac_mode = EDAC_UNKNOWN;
+ }
+ }
+ }
+}
+
+static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, int mc)
{
- int i, j, ret;
- struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
- struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL];
- void __iomem *window;
struct ie31200_priv *priv;
- u32 addr_decode, mad_offset;
-
- /*
- * Kaby Lake, Coffee Lake seem to work like Skylake. Please re-visit
- * this logic when adding new CPU support.
- */
- bool skl = DEVICE_ID_SKYLAKE_OR_LATER(pdev->device);
-
- edac_dbg(0, "MC:\n");
-
- if (!ecc_capable(pdev)) {
- ie31200_printk(KERN_INFO, "No ECC support\n");
- return -ENODEV;
- }
+ struct mem_ctl_info *mci;
+ void __iomem *window;
+ int ret;
nr_channels = how_many_channels(pdev);
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = IE31200_DIMMS;
+ layers[0].size = IE31200_RANKS_PER_CHANNEL;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = nr_channels;
layers[1].is_virt_csrow = false;
- mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+ mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers,
sizeof(struct ie31200_priv));
if (!mci)
return -ENOMEM;
- window = ie31200_map_mchbar(pdev);
+ window = ie31200_map_mchbar(pdev, cfg, mc);
if (!window) {
ret = -ENODEV;
goto fail_free;
}
edac_dbg(3, "MC: init mci\n");
- mci->pdev = &pdev->dev;
- if (skl)
- mci->mtype_cap = MEM_FLAG_DDR4;
- else
- mci->mtype_cap = MEM_FLAG_DDR3;
+ mci->mtype_cap = BIT(cfg->mtype);
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
- mci->ctl_name = ie31200_devs[dev_idx].ctl_name;
+ mci->ctl_name = ie31200_devs[mc].ctl_name;
mci->dev_name = pci_name(pdev);
- mci->edac_check = ie31200_check;
+ mci->edac_check = cfg->cmci ? NULL : ie31200_check;
mci->ctl_page_to_phys = NULL;
priv = mci->pvt_info;
priv->window = window;
- if (skl) {
- priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL;
- priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL;
- mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL;
- } else {
- priv->c0errlog = window + IE31200_C0ECCERRLOG;
- priv->c1errlog = window + IE31200_C1ECCERRLOG;
- mad_offset = IE31200_MAD_DIMM_0_OFFSET;
- }
-
- /* populate DIMM info */
- for (i = 0; i < IE31200_CHANNELS; i++) {
- addr_decode = readl(window + mad_offset +
- (i * 4));
- edac_dbg(0, "addr_decode: 0x%x\n", addr_decode);
- for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
- populate_dimm_info(&dimm_info[i][j], addr_decode, j,
- skl);
- edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n",
- dimm_info[i][j].size,
- dimm_info[i][j].dual_rank,
- dimm_info[i][j].x16_width);
- }
- }
-
+ priv->c0errlog = window + cfg->reg_eccerrlog_offset[0];
+ priv->c1errlog = window + cfg->reg_eccerrlog_offset[1];
+ priv->cfg = cfg;
+ priv->mci = mci;
+ priv->pdev = pdev;
+ device_initialize(&priv->dev);
/*
- * The dram rank boundary (DRB) reg values are boundary addresses
- * for each DRAM rank with a granularity of 64MB. DRB regs are
- * cumulative; the last one will contain the total memory
- * contained in all ranks.
+ * The EDAC core uses mci->pdev (pointer to the structure device)
+ * as the memory controller ID. The SoCs attach one or more memory
+ * controllers to a single pci_dev (a single pci_dev->dev can
+ * correspond to multiple memory controllers).
+ *
+ * To make mci->pdev unique, assign pci_dev->dev to mci->pdev
+ * for the first memory controller and assign a unique priv->dev
+ * to mci->pdev for each additional memory controller.
*/
- for (i = 0; i < IE31200_DIMMS_PER_CHANNEL; i++) {
- for (j = 0; j < IE31200_CHANNELS; j++) {
- struct dimm_info *dimm;
- unsigned long nr_pages;
-
- nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl);
- if (nr_pages == 0)
- continue;
-
- if (dimm_info[j][i].dual_rank) {
- nr_pages = nr_pages / 2;
- dimm = edac_get_dimm(mci, (i * 2) + 1, j, 0);
- dimm->nr_pages = nr_pages;
- edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
- dimm->grain = 8; /* just a guess */
- if (skl)
- dimm->mtype = MEM_DDR4;
- else
- dimm->mtype = MEM_DDR3;
- dimm->dtype = DEV_UNKNOWN;
- dimm->edac_mode = EDAC_UNKNOWN;
- }
- dimm = edac_get_dimm(mci, i * 2, j, 0);
- dimm->nr_pages = nr_pages;
- edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
- dimm->grain = 8; /* same guess */
- if (skl)
- dimm->mtype = MEM_DDR4;
- else
- dimm->mtype = MEM_DDR3;
- dimm->dtype = DEV_UNKNOWN;
- dimm->edac_mode = EDAC_UNKNOWN;
- }
- }
+ mci->pdev = mc ? &priv->dev : &pdev->dev;
+ ie31200_get_dimm_config(mci, window, cfg, mc);
ie31200_clear_error_info(mci);
if (edac_mc_add_mc(mci)) {
@@ -534,16 +499,115 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
goto fail_unmap;
}
- /* get this far and it's successful */
- edac_dbg(3, "MC: success\n");
+ ie31200_pvt.priv[mc] = priv;
return 0;
-
fail_unmap:
iounmap(window);
-
fail_free:
edac_mc_free(mci);
+ return ret;
+}
+
+static void mce_check(struct mce *mce)
+{
+ struct ie31200_priv *priv;
+ int i;
+
+ for (i = 0; i < IE31200_IMC_NUM; i++) {
+ priv = ie31200_pvt.priv[i];
+ if (!priv)
+ continue;
+
+ __ie31200_check(priv->mci, mce);
+ }
+}
+
+static int mce_handler(struct notifier_block *nb, unsigned long val, void *data)
+{
+ struct mce *mce = (struct mce *)data;
+ char *type;
+
+ if (mce->kflags & MCE_HANDLED_CEC)
+ return NOTIFY_DONE;
+
+ /*
+ * Ignore unless this is a memory related error.
+ * Don't check MCI_STATUS_ADDRV since it's not set on some CPUs.
+ */
+ if ((mce->status & 0xefff) >> 7 != 1)
+ return NOTIFY_DONE;
+
+ type = mce->mcgstatus & MCG_STATUS_MCIP ? "Exception" : "Event";
+
+ edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
+ mce->extcpu, type, mce->mcgstatus,
+ mce->bank, mce->status);
+ edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
+ edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
+ edac_dbg(0, "MISC 0x%llx\n", mce->misc);
+ edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
+ mce->cpuvendor, mce->cpuid, mce->time,
+ mce->socketid, mce->apicid);
+
+ mce_check(mce);
+ mce->kflags |= MCE_HANDLED_EDAC;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ie31200_mce_dec = {
+ .notifier_call = mce_handler,
+ .priority = MCE_PRIO_EDAC,
+};
+
+static void ie31200_unregister_mcis(void)
+{
+ struct ie31200_priv *priv;
+ struct mem_ctl_info *mci;
+ int i;
+
+ for (i = 0; i < IE31200_IMC_NUM; i++) {
+ priv = ie31200_pvt.priv[i];
+ if (!priv)
+ continue;
+ mci = priv->mci;
+ edac_mc_del_mc(mci->pdev);
+ iounmap(priv->window);
+ edac_mc_free(mci);
+ }
+}
+
+static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg)
+{
+ int i, ret;
+
+ edac_dbg(0, "MC:\n");
+
+ if (!ecc_capable(pdev)) {
+ ie31200_printk(KERN_INFO, "No ECC support\n");
+ return -ENODEV;
+ }
+
+ for (i = 0; i < cfg->imc_num; i++) {
+ ret = ie31200_register_mci(pdev, cfg, i);
+ if (ret)
+ goto fail_register;
+ }
+
+ if (cfg->cmci) {
+ mce_register_decode_chain(&ie31200_mce_dec);
+ edac_op_state = EDAC_OPSTATE_INT;
+ } else {
+ edac_op_state = EDAC_OPSTATE_POLL;
+ }
+
+ /* get this far and it's successful. */
+ edac_dbg(3, "MC: success\n");
+ return 0;
+
+fail_register:
+ ie31200_unregister_mcis();
return ret;
}
@@ -555,7 +619,7 @@ static int ie31200_init_one(struct pci_dev *pdev,
edac_dbg(0, "MC:\n");
if (pci_enable_device(pdev) < 0)
return -EIO;
- rc = ie31200_probe1(pdev, ent->driver_data);
+ rc = ie31200_probe1(pdev, (struct res_config *)ent->driver_data);
if (rc == 0 && !mci_pdev)
mci_pdev = pci_dev_get(pdev);
@@ -564,43 +628,112 @@ static int ie31200_init_one(struct pci_dev *pdev,
static void ie31200_remove_one(struct pci_dev *pdev)
{
- struct mem_ctl_info *mci;
- struct ie31200_priv *priv;
+ struct ie31200_priv *priv = ie31200_pvt.priv[0];
edac_dbg(0, "\n");
pci_dev_put(mci_pdev);
mci_pdev = NULL;
- mci = edac_mc_del_mc(&pdev->dev);
- if (!mci)
- return;
- priv = mci->pvt_info;
- iounmap(priv->window);
- edac_mc_free(mci);
+ if (priv->cfg->cmci)
+ mce_unregister_decode_chain(&ie31200_mce_dec);
+ ie31200_unregister_mcis();
}
+static struct res_config snb_cfg = {
+ .mtype = MEM_DDR3,
+ .imc_num = 1,
+ .reg_mchbar_mask = GENMASK_ULL(38, 15),
+ .reg_mchbar_window_size = BIT_ULL(15),
+ .reg_eccerrlog_offset[0] = 0x40c8,
+ .reg_eccerrlog_offset[1] = 0x44c8,
+ .reg_eccerrlog_ce_mask = BIT_ULL(0),
+ .reg_eccerrlog_ue_mask = BIT_ULL(1),
+ .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27),
+ .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16),
+ .reg_mad_dimm_size_granularity = BIT_ULL(28),
+ .reg_mad_dimm_offset[0] = 0x5004,
+ .reg_mad_dimm_offset[1] = 0x5008,
+ .reg_mad_dimm_size_mask[0] = GENMASK(7, 0),
+ .reg_mad_dimm_size_mask[1] = GENMASK(15, 8),
+ .reg_mad_dimm_rank_mask[0] = BIT(17),
+ .reg_mad_dimm_rank_mask[1] = BIT(18),
+ .reg_mad_dimm_width_mask[0] = BIT(19),
+ .reg_mad_dimm_width_mask[1] = BIT(20),
+};
+
+static struct res_config skl_cfg = {
+ .mtype = MEM_DDR4,
+ .imc_num = 1,
+ .reg_mchbar_mask = GENMASK_ULL(38, 15),
+ .reg_mchbar_window_size = BIT_ULL(15),
+ .reg_eccerrlog_offset[0] = 0x4048,
+ .reg_eccerrlog_offset[1] = 0x4448,
+ .reg_eccerrlog_ce_mask = BIT_ULL(0),
+ .reg_eccerrlog_ue_mask = BIT_ULL(1),
+ .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27),
+ .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16),
+ .reg_mad_dimm_size_granularity = BIT_ULL(30),
+ .reg_mad_dimm_offset[0] = 0x500c,
+ .reg_mad_dimm_offset[1] = 0x5010,
+ .reg_mad_dimm_size_mask[0] = GENMASK(5, 0),
+ .reg_mad_dimm_size_mask[1] = GENMASK(21, 16),
+ .reg_mad_dimm_rank_mask[0] = BIT(10),
+ .reg_mad_dimm_rank_mask[1] = BIT(26),
+ .reg_mad_dimm_width_mask[0] = GENMASK(9, 8),
+ .reg_mad_dimm_width_mask[1] = GENMASK(25, 24),
+};
+
+struct res_config rpl_s_cfg = {
+ .mtype = MEM_DDR5,
+ .cmci = true,
+ .imc_num = 2,
+ .reg_mchbar_mask = GENMASK_ULL(41, 17),
+ .reg_mchbar_window_size = BIT_ULL(16),
+ .reg_eccerrlog_offset[0] = 0xe048,
+ .reg_eccerrlog_offset[1] = 0xe848,
+ .reg_eccerrlog_ce_mask = BIT_ULL(0),
+ .reg_eccerrlog_ce_ovfl_mask = BIT_ULL(1),
+ .reg_eccerrlog_ue_mask = BIT_ULL(2),
+ .reg_eccerrlog_ue_ovfl_mask = BIT_ULL(3),
+ .reg_eccerrlog_rank_mask = GENMASK_ULL(28, 27),
+ .reg_eccerrlog_syndrome_mask = GENMASK_ULL(23, 16),
+ .msr_clear_eccerrlog_offset = 0x791,
+ .reg_mad_dimm_offset[0] = 0xd80c,
+ .reg_mad_dimm_offset[1] = 0xd810,
+ .reg_mad_dimm_size_granularity = BIT_ULL(29),
+ .reg_mad_dimm_size_mask[0] = GENMASK(6, 0),
+ .reg_mad_dimm_size_mask[1] = GENMASK(22, 16),
+ .reg_mad_dimm_rank_mask[0] = GENMASK(10, 9),
+ .reg_mad_dimm_rank_mask[1] = GENMASK(27, 26),
+ .reg_mad_dimm_width_mask[0] = GENMASK(8, 7),
+ .reg_mad_dimm_width_mask[1] = GENMASK(25, 24),
+};
+
static const struct pci_device_id ie31200_pci_tbl[] = {
- { PCI_VEND_DEV(INTEL, IE31200_HB_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_12), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_4), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_5), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_6), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
- { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_1), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_2), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_3), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_4), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_5), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_6), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_7), (kernel_ulong_t)&snb_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_8), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_9), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_10), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_11), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_12), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_1), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_2), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_3), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_4), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_5), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_6), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_7), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_8), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_9), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_10), (kernel_ulong_t)&skl_cfg },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg},
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg},
{ 0, } /* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl);
@@ -617,12 +750,10 @@ static int __init ie31200_init(void)
int pci_rc, i;
edac_dbg(3, "MC:\n");
- /* Ensure that the OPSTATE is set correctly for POLL or NMI */
- opstate_init();
pci_rc = pci_register_driver(&ie31200_driver);
if (pci_rc < 0)
- goto fail0;
+ return pci_rc;
if (!mci_pdev) {
ie31200_registered = 0;
@@ -633,11 +764,13 @@ static int __init ie31200_init(void)
if (mci_pdev)
break;
}
+
if (!mci_pdev) {
edac_dbg(0, "ie31200 pci_get_device fail\n");
pci_rc = -ENODEV;
- goto fail1;
+ goto fail0;
}
+
pci_rc = ie31200_init_one(mci_pdev, &ie31200_pci_tbl[i]);
if (pci_rc < 0) {
edac_dbg(0, "ie31200 init fail\n");
@@ -645,12 +778,12 @@ static int __init ie31200_init(void)
goto fail1;
}
}
- return 0;
+ return 0;
fail1:
- pci_unregister_driver(&ie31200_driver);
-fail0:
pci_dev_put(mci_pdev);
+fail0:
+ pci_unregister_driver(&ie31200_driver);
return pci_rc;
}
diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index fdf3a84fe698..cc50313b3e90 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -127,6 +127,7 @@
static struct res_config {
bool machine_check;
+ /* The number of present memory controllers. */
int num_imc;
u32 imc_base;
u32 cmf_base;
@@ -562,7 +563,7 @@ static struct res_config mtl_p_cfg = {
.err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
};
-static const struct pci_device_id igen6_pci_tbl[] = {
+static struct pci_device_id igen6_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
{ PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg },
@@ -785,13 +786,22 @@ static u64 ecclog_read_and_clear(struct igen6_imc *imc)
{
u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
- if (ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)) {
- /* Clear CE/UE bits by writing 1s */
- writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
- return ecclog;
- }
+ /*
+ * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain
+ * the invalid value ~0. This will result in a flood of invalid
+ * error reports in polling mode. Skip it.
+ */
+ if (ecclog == ~0)
+ return 0;
- return 0;
+ /* Neither a CE nor a UE. Skip it.*/
+ if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)))
+ return 0;
+
+ /* Clear CE/UE bits by writing 1s */
+ writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
+
+ return ecclog;
}
static void errsts_clear(struct igen6_imc *imc)
@@ -1192,23 +1202,21 @@ static void igen6_check(struct mem_ctl_info *mci)
irq_work_queue(&ecclog_irq_work);
}
-static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
+/* Check whether the memory controller is absent. */
+static bool igen6_imc_absent(void __iomem *window)
+{
+ return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0;
+}
+
+static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev)
{
struct edac_mc_layer layers[2];
struct mem_ctl_info *mci;
struct igen6_imc *imc;
- void __iomem *window;
int rc;
edac_dbg(2, "\n");
- mchbar += mc * MCHBAR_SIZE;
- window = ioremap(mchbar, MCHBAR_SIZE);
- if (!window) {
- igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
- return -ENODEV;
- }
-
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = NUM_CHANNELS;
layers[0].is_virt_csrow = false;
@@ -1274,7 +1282,6 @@ fail3:
fail2:
edac_mc_free(mci);
fail:
- iounmap(window);
return rc;
}
@@ -1300,6 +1307,58 @@ static void igen6_unregister_mcis(void)
}
}
+static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar)
+{
+ void __iomem *window;
+ int lmc, pmc, rc;
+ u64 base;
+
+ for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) {
+ base = mchbar + pmc * MCHBAR_SIZE;
+ window = ioremap(base, MCHBAR_SIZE);
+ if (!window) {
+ igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc);
+ rc = -ENOMEM;
+ goto out_unregister_mcis;
+ }
+
+ if (igen6_imc_absent(window)) {
+ iounmap(window);
+ edac_dbg(2, "Skip absent mc%d\n", pmc);
+ continue;
+ }
+
+ rc = igen6_register_mci(lmc, window, pdev);
+ if (rc)
+ goto out_iounmap;
+
+ /* Done, if all present MCs are detected and registered. */
+ if (++lmc >= res_cfg->num_imc)
+ break;
+ }
+
+ if (!lmc) {
+ igen6_printk(KERN_ERR, "No mc found.\n");
+ return -ENODEV;
+ }
+
+ if (lmc < res_cfg->num_imc) {
+ igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.",
+ res_cfg->num_imc, lmc);
+ res_cfg->num_imc = lmc;
+ }
+
+ return 0;
+
+out_iounmap:
+ iounmap(window);
+
+out_unregister_mcis:
+ igen6_unregister_mcis();
+
+ return rc;
+}
+
static int igen6_mem_slice_setup(u64 mchbar)
{
struct igen6_imc *imc = &igen6_pvt->imc[0];
@@ -1374,7 +1433,7 @@ static void unregister_err_handler(void)
unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
}
-static void opstate_set(struct res_config *cfg, const struct pci_device_id *ent)
+static void opstate_set(const struct res_config *cfg, const struct pci_device_id *ent)
{
/*
* Quirk: Certain SoCs' error reporting interrupts don't work.
@@ -1396,7 +1455,7 @@ static void opstate_set(struct res_config *cfg, const struct pci_device_id *ent)
static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
u64 mchbar;
- int i, rc;
+ int rc;
edac_dbg(2, "\n");
@@ -1412,11 +1471,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
opstate_set(res_cfg, ent);
- for (i = 0; i < res_cfg->num_imc; i++) {
- rc = igen6_register_mci(i, mchbar, pdev);
- if (rc)
- goto fail2;
- }
+ rc = igen6_register_mcis(pdev, mchbar);
+ if (rc)
+ goto fail;
if (res_cfg->num_imc > 1) {
rc = igen6_mem_slice_setup(mchbar);
diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c
new file mode 100644
index 000000000000..38745800ed01
--- /dev/null
+++ b/drivers/edac/loongson_edac.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited.
+ */
+
+#include <linux/acpi.h>
+#include <linux/edac.h>
+#include <linux/init.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include "edac_module.h"
+
+#define ECC_CS_COUNT_REG 0x18
+
+struct loongson_edac_pvt {
+ void __iomem *ecc_base;
+
+ /*
+ * The ECC register in this controller records the number of errors
+ * encountered since reset and cannot be zeroed so in order to be able
+ * to report the error count at each check, this records the previous
+ * register state.
+ */
+ int last_ce_count;
+};
+
+static int read_ecc(struct mem_ctl_info *mci)
+{
+ struct loongson_edac_pvt *pvt = mci->pvt_info;
+ u64 ecc;
+ int cs;
+
+ ecc = readq(pvt->ecc_base + ECC_CS_COUNT_REG);
+ /* cs0 -- cs3 */
+ cs = ecc & 0xff;
+ cs += (ecc >> 8) & 0xff;
+ cs += (ecc >> 16) & 0xff;
+ cs += (ecc >> 24) & 0xff;
+
+ return cs;
+}
+
+static void edac_check(struct mem_ctl_info *mci)
+{
+ struct loongson_edac_pvt *pvt = mci->pvt_info;
+ int new, add;
+
+ new = read_ecc(mci);
+ add = new - pvt->last_ce_count;
+ pvt->last_ce_count = new;
+ if (add <= 0)
+ return;
+
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
+ 0, 0, 0, 0, 0, -1, "error", "");
+}
+
+static void dimm_config_init(struct mem_ctl_info *mci)
+{
+ struct dimm_info *dimm;
+ u32 size, npages;
+
+ /* size not used */
+ size = -1;
+ npages = MiB_TO_PAGES(size);
+
+ dimm = edac_get_dimm(mci, 0, 0, 0);
+ dimm->nr_pages = npages;
+ snprintf(dimm->label, sizeof(dimm->label),
+ "MC#%uChannel#%u_DIMM#%u", mci->mc_idx, 0, 0);
+ dimm->grain = 8;
+}
+
+static void pvt_init(struct mem_ctl_info *mci, void __iomem *vbase)
+{
+ struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+ pvt->ecc_base = vbase;
+ pvt->last_ce_count = read_ecc(mci);
+}
+
+static int edac_probe(struct platform_device *pdev)
+{
+ struct edac_mc_layer layers[2];
+ struct mem_ctl_info *mci;
+ void __iomem *vbase;
+ int ret;
+
+ vbase = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(vbase))
+ return PTR_ERR(vbase);
+
+ layers[0].type = EDAC_MC_LAYER_CHANNEL;
+ layers[0].size = 1;
+ layers[0].is_virt_csrow = false;
+ layers[1].type = EDAC_MC_LAYER_SLOT;
+ layers[1].size = 1;
+ layers[1].is_virt_csrow = true;
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+ sizeof(struct loongson_edac_pvt));
+ if (mci == NULL)
+ return -ENOMEM;
+
+ mci->mc_idx = edac_device_alloc_index();
+ mci->mtype_cap = MEM_FLAG_RDDR4;
+ mci->edac_ctl_cap = EDAC_FLAG_NONE;
+ mci->edac_cap = EDAC_FLAG_NONE;
+ mci->mod_name = "loongson_edac.c";
+ mci->ctl_name = "loongson_edac_ctl";
+ mci->dev_name = "loongson_edac_dev";
+ mci->ctl_page_to_phys = NULL;
+ mci->pdev = &pdev->dev;
+ mci->error_desc.grain = 8;
+ mci->edac_check = edac_check;
+
+ pvt_init(mci, vbase);
+ dimm_config_init(mci);
+
+ ret = edac_mc_add_mc(mci);
+ if (ret) {
+ edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+ edac_mc_free(mci);
+ return ret;
+ }
+ edac_op_state = EDAC_OPSTATE_POLL;
+
+ return 0;
+}
+
+static void edac_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
+
+ if (mci)
+ edac_mc_free(mci);
+}
+
+static const struct acpi_device_id loongson_edac_acpi_match[] = {
+ {"LOON0010", 0},
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, loongson_edac_acpi_match);
+
+static struct platform_driver loongson_edac_driver = {
+ .probe = edac_probe,
+ .remove = edac_remove,
+ .driver = {
+ .name = "loongson-mc-edac",
+ .acpi_match_table = loongson_edac_acpi_match,
+ },
+};
+module_platform_driver(loongson_edac_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>");
+MODULE_DESCRIPTION("EDAC driver for loongson memory controller");
diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c
new file mode 100755
index 000000000000..1df8957a8459
--- /dev/null
+++ b/drivers/edac/mem_repair.c
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The generic EDAC memory repair driver is designed to control the memory
+ * devices with memory repair features, such as Post Package Repair (PPR),
+ * memory sparing etc. The common sysfs memory repair interface abstracts
+ * the control of various arbitrary memory repair functionalities into a
+ * unified set of functions.
+ *
+ * Copyright (c) 2024-2025 HiSilicon Limited.
+ */
+
+#include <linux/edac.h>
+
+enum edac_mem_repair_attributes {
+ MR_TYPE,
+ MR_PERSIST_MODE,
+ MR_SAFE_IN_USE,
+ MR_HPA,
+ MR_MIN_HPA,
+ MR_MAX_HPA,
+ MR_DPA,
+ MR_MIN_DPA,
+ MR_MAX_DPA,
+ MR_NIBBLE_MASK,
+ MR_BANK_GROUP,
+ MR_BANK,
+ MR_RANK,
+ MR_ROW,
+ MR_COLUMN,
+ MR_CHANNEL,
+ MR_SUB_CHANNEL,
+ MEM_DO_REPAIR,
+ MR_MAX_ATTRS
+};
+
+struct edac_mem_repair_dev_attr {
+ struct device_attribute dev_attr;
+ u8 instance;
+};
+
+struct edac_mem_repair_context {
+ char name[EDAC_FEAT_NAME_LEN];
+ struct edac_mem_repair_dev_attr mem_repair_dev_attr[MR_MAX_ATTRS];
+ struct attribute *mem_repair_attrs[MR_MAX_ATTRS + 1];
+ struct attribute_group group;
+};
+
+#define TO_MR_DEV_ATTR(_dev_attr) \
+ container_of(_dev_attr, struct edac_mem_repair_dev_attr, dev_attr)
+
+#define MR_ATTR_SHOW(attrib, cb, type, format) \
+static ssize_t attrib##_show(struct device *ras_feat_dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ u8 inst = TO_MR_DEV_ATTR(attr)->instance; \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_mem_repair_ops *ops = \
+ ctx->mem_repair[inst].mem_repair_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \
+ &data); \
+ if (ret) \
+ return ret; \
+ \
+ return sysfs_emit(buf, format, data); \
+}
+
+MR_ATTR_SHOW(repair_type, get_repair_type, const char *, "%s\n")
+MR_ATTR_SHOW(persist_mode, get_persist_mode, bool, "%u\n")
+MR_ATTR_SHOW(repair_safe_when_in_use, get_repair_safe_when_in_use, bool, "%u\n")
+MR_ATTR_SHOW(hpa, get_hpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(min_hpa, get_min_hpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(max_hpa, get_max_hpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(dpa, get_dpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(min_dpa, get_min_dpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(max_dpa, get_max_dpa, u64, "0x%llx\n")
+MR_ATTR_SHOW(nibble_mask, get_nibble_mask, u32, "0x%x\n")
+MR_ATTR_SHOW(bank_group, get_bank_group, u32, "%u\n")
+MR_ATTR_SHOW(bank, get_bank, u32, "%u\n")
+MR_ATTR_SHOW(rank, get_rank, u32, "%u\n")
+MR_ATTR_SHOW(row, get_row, u32, "0x%x\n")
+MR_ATTR_SHOW(column, get_column, u32, "%u\n")
+MR_ATTR_SHOW(channel, get_channel, u32, "%u\n")
+MR_ATTR_SHOW(sub_channel, get_sub_channel, u32, "%u\n")
+
+#define MR_ATTR_STORE(attrib, cb, type, conv_func) \
+static ssize_t attrib##_store(struct device *ras_feat_dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ u8 inst = TO_MR_DEV_ATTR(attr)->instance; \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_mem_repair_ops *ops = \
+ ctx->mem_repair[inst].mem_repair_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = conv_func(buf, 0, &data); \
+ if (ret < 0) \
+ return ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, \
+ data); \
+ if (ret) \
+ return ret; \
+ \
+ return len; \
+}
+
+MR_ATTR_STORE(persist_mode, set_persist_mode, unsigned long, kstrtoul)
+MR_ATTR_STORE(hpa, set_hpa, u64, kstrtou64)
+MR_ATTR_STORE(dpa, set_dpa, u64, kstrtou64)
+MR_ATTR_STORE(nibble_mask, set_nibble_mask, unsigned long, kstrtoul)
+MR_ATTR_STORE(bank_group, set_bank_group, unsigned long, kstrtoul)
+MR_ATTR_STORE(bank, set_bank, unsigned long, kstrtoul)
+MR_ATTR_STORE(rank, set_rank, unsigned long, kstrtoul)
+MR_ATTR_STORE(row, set_row, unsigned long, kstrtoul)
+MR_ATTR_STORE(column, set_column, unsigned long, kstrtoul)
+MR_ATTR_STORE(channel, set_channel, unsigned long, kstrtoul)
+MR_ATTR_STORE(sub_channel, set_sub_channel, unsigned long, kstrtoul)
+
+#define MR_DO_OP(attrib, cb) \
+static ssize_t attrib##_store(struct device *ras_feat_dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ u8 inst = TO_MR_DEV_ATTR(attr)->instance; \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops; \
+ unsigned long data; \
+ int ret; \
+ \
+ ret = kstrtoul(buf, 0, &data); \
+ if (ret < 0) \
+ return ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->mem_repair[inst].private, data); \
+ if (ret) \
+ return ret; \
+ \
+ return len; \
+}
+
+MR_DO_OP(repair, do_repair)
+
+static umode_t mem_repair_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id)
+{
+ struct device *ras_feat_dev = kobj_to_dev(kobj);
+ struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr);
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ u8 inst = TO_MR_DEV_ATTR(dev_attr)->instance;
+ const struct edac_mem_repair_ops *ops = ctx->mem_repair[inst].mem_repair_ops;
+
+ switch (attr_id) {
+ case MR_TYPE:
+ if (ops->get_repair_type)
+ return a->mode;
+ break;
+ case MR_PERSIST_MODE:
+ if (ops->get_persist_mode) {
+ if (ops->set_persist_mode)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_SAFE_IN_USE:
+ if (ops->get_repair_safe_when_in_use)
+ return a->mode;
+ break;
+ case MR_HPA:
+ if (ops->get_hpa) {
+ if (ops->set_hpa)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_MIN_HPA:
+ if (ops->get_min_hpa)
+ return a->mode;
+ break;
+ case MR_MAX_HPA:
+ if (ops->get_max_hpa)
+ return a->mode;
+ break;
+ case MR_DPA:
+ if (ops->get_dpa) {
+ if (ops->set_dpa)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_MIN_DPA:
+ if (ops->get_min_dpa)
+ return a->mode;
+ break;
+ case MR_MAX_DPA:
+ if (ops->get_max_dpa)
+ return a->mode;
+ break;
+ case MR_NIBBLE_MASK:
+ if (ops->get_nibble_mask) {
+ if (ops->set_nibble_mask)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_BANK_GROUP:
+ if (ops->get_bank_group) {
+ if (ops->set_bank_group)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_BANK:
+ if (ops->get_bank) {
+ if (ops->set_bank)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_RANK:
+ if (ops->get_rank) {
+ if (ops->set_rank)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_ROW:
+ if (ops->get_row) {
+ if (ops->set_row)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_COLUMN:
+ if (ops->get_column) {
+ if (ops->set_column)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_CHANNEL:
+ if (ops->get_channel) {
+ if (ops->set_channel)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_SUB_CHANNEL:
+ if (ops->get_sub_channel) {
+ if (ops->set_sub_channel)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MEM_DO_REPAIR:
+ if (ops->do_repair)
+ return a->mode;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+#define MR_ATTR_RO(_name, _instance) \
+ ((struct edac_mem_repair_dev_attr) { .dev_attr = __ATTR_RO(_name), \
+ .instance = _instance })
+
+#define MR_ATTR_WO(_name, _instance) \
+ ((struct edac_mem_repair_dev_attr) { .dev_attr = __ATTR_WO(_name), \
+ .instance = _instance })
+
+#define MR_ATTR_RW(_name, _instance) \
+ ((struct edac_mem_repair_dev_attr) { .dev_attr = __ATTR_RW(_name), \
+ .instance = _instance })
+
+static int mem_repair_create_desc(struct device *dev,
+ const struct attribute_group **attr_groups,
+ u8 instance)
+{
+ struct edac_mem_repair_context *ctx;
+ struct attribute_group *group;
+ int i;
+ struct edac_mem_repair_dev_attr dev_attr[] = {
+ [MR_TYPE] = MR_ATTR_RO(repair_type, instance),
+ [MR_PERSIST_MODE] = MR_ATTR_RW(persist_mode, instance),
+ [MR_SAFE_IN_USE] = MR_ATTR_RO(repair_safe_when_in_use, instance),
+ [MR_HPA] = MR_ATTR_RW(hpa, instance),
+ [MR_MIN_HPA] = MR_ATTR_RO(min_hpa, instance),
+ [MR_MAX_HPA] = MR_ATTR_RO(max_hpa, instance),
+ [MR_DPA] = MR_ATTR_RW(dpa, instance),
+ [MR_MIN_DPA] = MR_ATTR_RO(min_dpa, instance),
+ [MR_MAX_DPA] = MR_ATTR_RO(max_dpa, instance),
+ [MR_NIBBLE_MASK] = MR_ATTR_RW(nibble_mask, instance),
+ [MR_BANK_GROUP] = MR_ATTR_RW(bank_group, instance),
+ [MR_BANK] = MR_ATTR_RW(bank, instance),
+ [MR_RANK] = MR_ATTR_RW(rank, instance),
+ [MR_ROW] = MR_ATTR_RW(row, instance),
+ [MR_COLUMN] = MR_ATTR_RW(column, instance),
+ [MR_CHANNEL] = MR_ATTR_RW(channel, instance),
+ [MR_SUB_CHANNEL] = MR_ATTR_RW(sub_channel, instance),
+ [MEM_DO_REPAIR] = MR_ATTR_WO(repair, instance)
+ };
+
+ ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ for (i = 0; i < MR_MAX_ATTRS; i++) {
+ memcpy(&ctx->mem_repair_dev_attr[i],
+ &dev_attr[i], sizeof(dev_attr[i]));
+ sysfs_attr_init(&ctx->mem_repair_dev_attr[i].dev_attr.attr);
+ ctx->mem_repair_attrs[i] =
+ &ctx->mem_repair_dev_attr[i].dev_attr.attr;
+ }
+
+ sprintf(ctx->name, "%s%d", "mem_repair", instance);
+ group = &ctx->group;
+ group->name = ctx->name;
+ group->attrs = ctx->mem_repair_attrs;
+ group->is_visible = mem_repair_attr_visible;
+ attr_groups[0] = group;
+
+ return 0;
+}
+
+/**
+ * edac_mem_repair_get_desc - get EDAC memory repair descriptors
+ * @dev: client device with memory repair feature
+ * @attr_groups: pointer to attribute group container
+ * @instance: device's memory repair instance number.
+ *
+ * Return:
+ * * %0 - Success.
+ * * %-EINVAL - Invalid parameters passed.
+ * * %-ENOMEM - Dynamic memory allocation failed.
+ */
+int edac_mem_repair_get_desc(struct device *dev,
+ const struct attribute_group **attr_groups, u8 instance)
+{
+ if (!dev || !attr_groups)
+ return -EINVAL;
+
+ return mem_repair_create_desc(dev, attr_groups, instance);
+}
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index f93f2f2b1cf2..af14c8a3279f 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -372,7 +372,7 @@ static int gen_asym_mask(struct b_cr_slice_channel_hash *p,
struct b_cr_asym_mem_region1_mchbar *as1,
struct b_cr_asym_2way_mem_region_mchbar *as2way)
{
- const int intlv[] = { 0x5, 0xA, 0x3, 0xC };
+ static const int intlv[] = { 0x5, 0xA, 0x3, 0xC };
int mask = 0;
if (as2way->asym_2way_interleave_enable)
@@ -489,7 +489,7 @@ static int dnv_get_registers(void)
*/
static int get_registers(void)
{
- const int intlv[] = { 10, 11, 12, 12 };
+ static const int intlv[] = { 10, 11, 12, 12 };
if (RD_REG(&tolud, b_cr_tolud_pci) ||
RD_REG(&touud_lo, b_cr_touud_lo_pci) ||
diff --git a/drivers/edac/scrub.c b/drivers/edac/scrub.c
new file mode 100755
index 000000000000..f9d02af2fc3a
--- /dev/null
+++ b/drivers/edac/scrub.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The generic EDAC scrub driver controls the memory scrubbers in the
+ * system. The common sysfs scrub interface abstracts the control of
+ * various arbitrary scrubbing functionalities into a unified set of
+ * functions.
+ *
+ * Copyright (c) 2024-2025 HiSilicon Limited.
+ */
+
+#include <linux/edac.h>
+
+enum edac_scrub_attributes {
+ SCRUB_ADDRESS,
+ SCRUB_SIZE,
+ SCRUB_ENABLE_BACKGROUND,
+ SCRUB_MIN_CYCLE_DURATION,
+ SCRUB_MAX_CYCLE_DURATION,
+ SCRUB_CUR_CYCLE_DURATION,
+ SCRUB_MAX_ATTRS
+};
+
+struct edac_scrub_dev_attr {
+ struct device_attribute dev_attr;
+ u8 instance;
+};
+
+struct edac_scrub_context {
+ char name[EDAC_FEAT_NAME_LEN];
+ struct edac_scrub_dev_attr scrub_dev_attr[SCRUB_MAX_ATTRS];
+ struct attribute *scrub_attrs[SCRUB_MAX_ATTRS + 1];
+ struct attribute_group group;
+};
+
+#define TO_SCRUB_DEV_ATTR(_dev_attr) \
+ container_of(_dev_attr, struct edac_scrub_dev_attr, dev_attr)
+
+#define EDAC_SCRUB_ATTR_SHOW(attrib, cb, type, format) \
+static ssize_t attrib##_show(struct device *ras_feat_dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, &data); \
+ if (ret) \
+ return ret; \
+ \
+ return sysfs_emit(buf, format, data); \
+}
+
+EDAC_SCRUB_ATTR_SHOW(addr, read_addr, u64, "0x%llx\n")
+EDAC_SCRUB_ATTR_SHOW(size, read_size, u64, "0x%llx\n")
+EDAC_SCRUB_ATTR_SHOW(enable_background, get_enabled_bg, bool, "%u\n")
+EDAC_SCRUB_ATTR_SHOW(min_cycle_duration, get_min_cycle, u32, "%u\n")
+EDAC_SCRUB_ATTR_SHOW(max_cycle_duration, get_max_cycle, u32, "%u\n")
+EDAC_SCRUB_ATTR_SHOW(current_cycle_duration, get_cycle_duration, u32, "%u\n")
+
+#define EDAC_SCRUB_ATTR_STORE(attrib, cb, type, conv_func) \
+static ssize_t attrib##_store(struct device *ras_feat_dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ u8 inst = TO_SCRUB_DEV_ATTR(attr)->instance; \
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev); \
+ const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops; \
+ type data; \
+ int ret; \
+ \
+ ret = conv_func(buf, 0, &data); \
+ if (ret < 0) \
+ return ret; \
+ \
+ ret = ops->cb(ras_feat_dev->parent, ctx->scrub[inst].private, data); \
+ if (ret) \
+ return ret; \
+ \
+ return len; \
+}
+
+EDAC_SCRUB_ATTR_STORE(addr, write_addr, u64, kstrtou64)
+EDAC_SCRUB_ATTR_STORE(size, write_size, u64, kstrtou64)
+EDAC_SCRUB_ATTR_STORE(enable_background, set_enabled_bg, unsigned long, kstrtoul)
+EDAC_SCRUB_ATTR_STORE(current_cycle_duration, set_cycle_duration, unsigned long, kstrtoul)
+
+static umode_t scrub_attr_visible(struct kobject *kobj, struct attribute *a, int attr_id)
+{
+ struct device *ras_feat_dev = kobj_to_dev(kobj);
+ struct device_attribute *dev_attr = container_of(a, struct device_attribute, attr);
+ u8 inst = TO_SCRUB_DEV_ATTR(dev_attr)->instance;
+ struct edac_dev_feat_ctx *ctx = dev_get_drvdata(ras_feat_dev);
+ const struct edac_scrub_ops *ops = ctx->scrub[inst].scrub_ops;
+
+ switch (attr_id) {
+ case SCRUB_ADDRESS:
+ if (ops->read_addr) {
+ if (ops->write_addr)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case SCRUB_SIZE:
+ if (ops->read_size) {
+ if (ops->write_size)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case SCRUB_ENABLE_BACKGROUND:
+ if (ops->get_enabled_bg) {
+ if (ops->set_enabled_bg)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case SCRUB_MIN_CYCLE_DURATION:
+ if (ops->get_min_cycle)
+ return a->mode;
+ break;
+ case SCRUB_MAX_CYCLE_DURATION:
+ if (ops->get_max_cycle)
+ return a->mode;
+ break;
+ case SCRUB_CUR_CYCLE_DURATION:
+ if (ops->get_cycle_duration) {
+ if (ops->set_cycle_duration)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+#define EDAC_SCRUB_ATTR_RO(_name, _instance) \
+ ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RO(_name), \
+ .instance = _instance })
+
+#define EDAC_SCRUB_ATTR_WO(_name, _instance) \
+ ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_WO(_name), \
+ .instance = _instance })
+
+#define EDAC_SCRUB_ATTR_RW(_name, _instance) \
+ ((struct edac_scrub_dev_attr) { .dev_attr = __ATTR_RW(_name), \
+ .instance = _instance })
+
+static int scrub_create_desc(struct device *scrub_dev,
+ const struct attribute_group **attr_groups, u8 instance)
+{
+ struct edac_scrub_context *scrub_ctx;
+ struct attribute_group *group;
+ int i;
+ struct edac_scrub_dev_attr dev_attr[] = {
+ [SCRUB_ADDRESS] = EDAC_SCRUB_ATTR_RW(addr, instance),
+ [SCRUB_SIZE] = EDAC_SCRUB_ATTR_RW(size, instance),
+ [SCRUB_ENABLE_BACKGROUND] = EDAC_SCRUB_ATTR_RW(enable_background, instance),
+ [SCRUB_MIN_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(min_cycle_duration, instance),
+ [SCRUB_MAX_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RO(max_cycle_duration, instance),
+ [SCRUB_CUR_CYCLE_DURATION] = EDAC_SCRUB_ATTR_RW(current_cycle_duration, instance)
+ };
+
+ scrub_ctx = devm_kzalloc(scrub_dev, sizeof(*scrub_ctx), GFP_KERNEL);
+ if (!scrub_ctx)
+ return -ENOMEM;
+
+ group = &scrub_ctx->group;
+ for (i = 0; i < SCRUB_MAX_ATTRS; i++) {
+ memcpy(&scrub_ctx->scrub_dev_attr[i], &dev_attr[i], sizeof(dev_attr[i]));
+ sysfs_attr_init(&scrub_ctx->scrub_dev_attr[i].dev_attr.attr);
+ scrub_ctx->scrub_attrs[i] = &scrub_ctx->scrub_dev_attr[i].dev_attr.attr;
+ }
+ sprintf(scrub_ctx->name, "%s%d", "scrub", instance);
+ group->name = scrub_ctx->name;
+ group->attrs = scrub_ctx->scrub_attrs;
+ group->is_visible = scrub_attr_visible;
+
+ attr_groups[0] = group;
+
+ return 0;
+}
+
+/**
+ * edac_scrub_get_desc - get EDAC scrub descriptors
+ * @scrub_dev: client device, with scrub support
+ * @attr_groups: pointer to attribute group container
+ * @instance: device's scrub instance number.
+ *
+ * Return:
+ * * %0 - Success.
+ * * %-EINVAL - Invalid parameters passed.
+ * * %-ENOMEM - Dynamic memory allocation failed.
+ */
+int edac_scrub_get_desc(struct device *scrub_dev,
+ const struct attribute_group **attr_groups, u8 instance)
+{
+ if (!scrub_dev || !attr_groups)
+ return -EINVAL;
+
+ return scrub_create_desc(scrub_dev, attr_groups, instance);
+}
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index 14cfd394b469..29897b21fb8e 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -164,7 +164,7 @@ static struct res_config skx_cfg = {
};
static const struct x86_cpu_id skx_cpuids[] = {
- X86_MATCH_VFM_STEPPINGS(INTEL_SKYLAKE_X, X86_STEPPINGS(0x0, 0xf), &skx_cfg),
+ X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_cfg),
{ }
};
MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
@@ -600,7 +600,7 @@ static int __init skx_init(void)
const struct munit *m;
const char *owner;
int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8};
- u8 mc = 0, src_id, node_id;
+ u8 mc = 0, src_id;
struct skx_dev *d;
edac_dbg(2, "\n");
@@ -650,15 +650,12 @@ static int __init skx_init(void)
rc = skx_get_src_id(d, 0xf0, &src_id);
if (rc < 0)
goto fail;
- rc = skx_get_node_id(d, &node_id);
- if (rc < 0)
- goto fail;
- edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
+
+ edac_dbg(2, "src_id = %d\n", src_id);
for (i = 0; i < SKX_NUM_IMC; i++) {
d->imc[i].mc = mc++;
d->imc[i].lmc = i;
d->imc[i].src_id = src_id;
- d->imc[i].node_id = node_id;
rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev,
"Skylake Socket", EDAC_MOD_STR,
skx_get_dimm_config, cfg);
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 6cf17af7d911..c9ade45c1a99 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -19,6 +19,7 @@
#include <linux/adxl.h>
#include <acpi/nfit.h>
#include <asm/mce.h>
+#include <asm/uv/uv.h>
#include "edac_module.h"
#include "skx_common.h"
@@ -115,11 +116,41 @@ EXPORT_SYMBOL_GPL(skx_adxl_get);
void skx_adxl_put(void)
{
+ adxl_component_count = 0;
kfree(adxl_values);
kfree(adxl_msg);
}
EXPORT_SYMBOL_GPL(skx_adxl_put);
+static void skx_init_mc_mapping(struct skx_dev *d)
+{
+ /*
+ * By default, the BIOS presents all memory controllers within each
+ * socket to the EDAC driver. The physical indices are the same as
+ * the logical indices of the memory controllers enumerated by the
+ * EDAC driver.
+ */
+ for (int i = 0; i < NUM_IMC; i++)
+ d->mc_mapping[i] = i;
+}
+
+void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc)
+{
+ edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n",
+ pmc, lmc);
+
+ d->mc_mapping[pmc] = lmc;
+}
+EXPORT_SYMBOL_GPL(skx_set_mc_mapping);
+
+static u8 skx_get_mc_mapping(struct skx_dev *d, u8 pmc)
+{
+ edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n",
+ pmc, d->mc_mapping[pmc]);
+
+ return d->mc_mapping[pmc];
+}
+
static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
{
struct skx_dev *d;
@@ -187,6 +218,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
return false;
}
+ res->imc = skx_get_mc_mapping(d, res->imc);
+
for (i = 0; i < adxl_component_count; i++) {
if (adxl_values[i] == ~0x0ull)
continue;
@@ -221,33 +254,51 @@ void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
}
EXPORT_SYMBOL_GPL(skx_set_decode);
-int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
+static int skx_get_pkg_id(struct skx_dev *d, u8 *id)
{
- u32 reg;
+ int node;
+ int cpu;
- if (pci_read_config_dword(d->util_all, off, &reg)) {
- skx_printk(KERN_ERR, "Failed to read src id\n");
- return -ENODEV;
+ node = pcibus_to_node(d->util_all->bus);
+ if (numa_valid_node(node)) {
+ for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) {
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->initialized && cpu_to_node(cpu) == node) {
+ *id = c->topo.pkg_id;
+ return 0;
+ }
+ }
}
- *id = GET_BITFIELD(reg, 12, 14);
- return 0;
+ skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n");
+ return -ENODEV;
}
-EXPORT_SYMBOL_GPL(skx_get_src_id);
-int skx_get_node_id(struct skx_dev *d, u8 *id)
+int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
{
u32 reg;
- if (pci_read_config_dword(d->util_all, 0xf4, &reg)) {
- skx_printk(KERN_ERR, "Failed to read node id\n");
+ /*
+ * The 3-bit source IDs in PCI configuration space registers are limited
+ * to 8 unique IDs, and each ID is local to a UPI/QPI domain.
+ *
+ * Source IDs cannot be used to map devices to sockets on UV systems
+ * because they can exceed 8 sockets and have multiple UPI/QPI domains
+ * with identical, repeating source IDs.
+ */
+ if (is_uv_system())
+ return skx_get_pkg_id(d, id);
+
+ if (pci_read_config_dword(d->util_all, off, &reg)) {
+ skx_printk(KERN_ERR, "Failed to read src id\n");
return -ENODEV;
}
- *id = GET_BITFIELD(reg, 0, 2);
+ *id = GET_BITFIELD(reg, 12, 14);
return 0;
}
-EXPORT_SYMBOL_GPL(skx_get_node_id);
+EXPORT_SYMBOL_GPL(skx_get_src_id);
static int get_width(u32 mtr)
{
@@ -307,6 +358,8 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
list_add_tail(&d->list, &dev_edac_list);
prev = pdev;
+
+ skx_init_mc_mapping(d);
}
if (list)
@@ -507,7 +560,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
pvt->imc = imc;
mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name,
- imc->node_id, imc->lmc);
+ imc->src_id, imc->lmc);
if (!mci->ctl_name) {
rc = -ENOMEM;
goto fail0;
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 54bba8a62f72..5afd425f3b4f 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -79,6 +79,9 @@
*/
#define MCACOD_EXT_MEM_ERR 0x280
+/* Max RRL register sets per {,sub-,pseudo-}channel. */
+#define NUM_RRL_SET 3
+
/*
* Each cpu socket contains some pci devices that provide global
* information, and also some that are local to each of the two
@@ -93,6 +96,16 @@ struct skx_dev {
struct pci_dev *uracu; /* for i10nm CPU */
struct pci_dev *pcu_cr3; /* for HBM memory detection */
u32 mcroute;
+ /*
+ * Some server BIOS may hide certain memory controllers, and the
+ * EDAC driver skips those hidden memory controllers. However, the
+ * ADXL still decodes memory error address using physical memory
+ * controller indices. The mapping table is used to convert the
+ * physical indices (reported by ADXL) to the logical indices
+ * (used the EDAC driver) of present memory controllers during the
+ * error handling process.
+ */
+ u8 mc_mapping[NUM_IMC];
struct skx_imc {
struct mem_ctl_info *mci;
struct pci_dev *mdev; /* for i10nm CPU */
@@ -103,13 +116,15 @@ struct skx_dev {
bool hbm_mc;
u8 mc; /* system wide mc# */
u8 lmc; /* socket relative mc# */
- u8 src_id, node_id;
+ u8 src_id;
struct skx_channel {
struct pci_dev *cdev;
struct pci_dev *edev;
- u32 retry_rd_err_log_s;
- u32 retry_rd_err_log_d;
- u32 retry_rd_err_log_d2;
+ /*
+ * Two groups of RRL control registers per channel to save default RRL
+ * settings of two {sub-,pseudo-}channels in Linux RRL control mode.
+ */
+ u32 rrl_ctl[2][NUM_RRL_SET];
struct skx_dimm {
u8 close_pg;
u8 bank_xor_enable;
@@ -242,9 +257,9 @@ void skx_adxl_put(void);
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
void skx_set_mem_cfg(bool mem_cfg_2lm);
void skx_set_res_cfg(struct res_config *cfg);
+void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc);
int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
-int skx_get_node_id(struct skx_dev *d, u8 *id);
int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c
index 699c7d29d80c..9955396c9a52 100644
--- a/drivers/edac/xgene_edac.c
+++ b/drivers/edac/xgene_edac.c
@@ -15,6 +15,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/regmap.h>
+#include <linux/string_choices.h>
#include "edac_module.h"
@@ -1407,7 +1408,7 @@ static void xgene_edac_iob_gic_report(struct edac_device_ctl_info *edac_dev)
dev_err(edac_dev->dev, "Multiple XGIC write size error\n");
info = readl(ctx->dev_csr + XGICTRANSERRREQINFO);
dev_err(edac_dev->dev, "XGIC %s access @ 0x%08X (0x%08X)\n",
- info & REQTYPE_MASK ? "read" : "write", ERRADDR_RD(info),
+ str_read_write(info & REQTYPE_MASK), ERRADDR_RD(info),
info);
writel(reg, ctx->dev_csr + XGICTRANSERRINTSTS);
@@ -1489,19 +1490,19 @@ static void xgene_edac_rb_report(struct edac_device_ctl_info *edac_dev)
if (reg & AGENT_OFFLINE_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s access to offline agent error\n",
- write ? "write" : "read");
+ str_write_read(write));
if (reg & UNIMPL_RBPAGE_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s access to unimplemented page error\n",
- write ? "write" : "read");
+ str_write_read(write));
if (reg & WORD_ALIGNED_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s word aligned access error\n",
- write ? "write" : "read");
+ str_write_read(write));
if (reg & PAGE_ACCESS_ERR_MASK)
dev_err(edac_dev->dev,
"IOB bus %s to page out of range access error\n",
- write ? "write" : "read");
+ str_write_read(write));
if (regmap_write(ctx->edac->rb_map, RBEIR, 0))
return;
if (regmap_write(ctx->edac->rb_map, RBCSR, 0))
@@ -1560,7 +1561,7 @@ rb_skip:
err_addr_lo = readl(ctx->dev_csr + IOBBATRANSERRREQINFOL);
err_addr_hi = readl(ctx->dev_csr + IOBBATRANSERRREQINFOH);
dev_err(edac_dev->dev, "IOB BA %s access at 0x%02X.%08X (0x%08X)\n",
- REQTYPE_F2_RD(err_addr_hi) ? "read" : "write",
+ str_read_write(REQTYPE_F2_RD(err_addr_hi)),
ERRADDRH_F2_RD(err_addr_hi), err_addr_lo, err_addr_hi);
if (reg & WRERR_RESP_MASK)
dev_err(edac_dev->dev, "IOB BA requestor ID 0x%08X\n",
@@ -1611,7 +1612,7 @@ chk_iob_axi0:
dev_err(edac_dev->dev,
"%sAXI slave 0 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
- REQTYPE_RD(err_addr_hi) ? "read" : "write",
+ str_read_write(REQTYPE_RD(err_addr_hi)),
ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
writel(reg, ctx->dev_csr + IOBAXIS0TRANSERRINTSTS);
@@ -1625,7 +1626,7 @@ chk_iob_axi1:
dev_err(edac_dev->dev,
"%sAXI slave 1 illegal %s access @ 0x%02X.%08X (0x%08X)\n",
reg & IOBAXIS0_M_ILLEGAL_ACCESS_MASK ? "Multiple " : "",
- REQTYPE_RD(err_addr_hi) ? "read" : "write",
+ str_read_write(REQTYPE_RD(err_addr_hi)),
ERRADDRH_RD(err_addr_hi), err_addr_lo, err_addr_hi);
writel(reg, ctx->dev_csr + IOBAXIS1TRANSERRINTSTS);
}