summaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig7
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/altera_edac.c58
-rw-r--r--drivers/edac/altera_edac.h25
-rw-r--r--drivers/edac/amd64_edac.c371
-rw-r--r--drivers/edac/amd64_edac.h15
-rw-r--r--drivers/edac/bluefield_edac.c356
-rw-r--r--drivers/edac/edac_mc.c53
-rw-r--r--drivers/edac/edac_mc.h6
-rw-r--r--drivers/edac/edac_mc_sysfs.c92
-rw-r--r--drivers/edac/ghes_edac.c2
-rw-r--r--drivers/edac/i5100_edac.c16
-rw-r--r--drivers/edac/pnd2_edac.c7
13 files changed, 790 insertions, 219 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 200c04ce5b0e..2a2603bfb918 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -510,4 +510,11 @@ config EDAC_ASPEED
First, ECC must be configured in the bootloader. Then, this driver
will expose error counters via the EDAC kernel framework.
+config EDAC_BLUEFIELD
+ tristate "Mellanox BlueField Memory ECC"
+ depends on ARM64 && ((MELLANOX_PLATFORM && ACPI) || COMPILE_TEST)
+ help
+ Support for error detection and correction on the
+ Mellanox BlueField SoCs.
+
endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 165ca65e1a3a..d265ff9311f0 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -85,3 +85,4 @@ obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o
obj-$(CONFIG_EDAC_TI) += ti_edac.o
obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o
obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o
+obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index c2e693e34d43..fbda4b876afd 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -222,7 +222,6 @@ static unsigned long get_total_mem(void)
static const struct of_device_id altr_sdram_ctrl_of_match[] = {
{ .compatible = "altr,sdram-edac", .data = &c5_data},
{ .compatible = "altr,sdram-edac-a10", .data = &a10_data},
- { .compatible = "altr,sdram-edac-s10", .data = &a10_data},
{},
};
MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
@@ -1170,6 +1169,24 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
return 0;
}
+/*********************** SDRAM EDAC Device Functions *********************/
+
+#ifdef CONFIG_EDAC_ALTERA_SDRAM
+
+static const struct edac_device_prv_data s10_sdramecc_data = {
+ .setup = altr_check_ecc_deps,
+ .ce_clear_mask = ALTR_S10_ECC_SERRPENA,
+ .ue_clear_mask = ALTR_S10_ECC_DERRPENA,
+ .ecc_enable_mask = ALTR_S10_ECC_EN,
+ .ecc_en_ofst = ALTR_S10_ECC_CTRL_SDRAM_OFST,
+ .ce_set_mask = ALTR_S10_ECC_TSERRA,
+ .ue_set_mask = ALTR_S10_ECC_TDERRA,
+ .set_err_ofst = ALTR_S10_ECC_INTTEST_OFST,
+ .ecc_irq_handler = altr_edac_a10_ecc_irq,
+ .inject_fops = &altr_edac_a10_device_inject_fops,
+};
+#endif /* CONFIG_EDAC_ALTERA_SDRAM */
+
/*********************** OCRAM EDAC Device Functions *********************/
#ifdef CONFIG_EDAC_ALTERA_OCRAM
@@ -1759,6 +1776,9 @@ static const struct of_device_id altr_edac_a10_device_of_match[] = {
#ifdef CONFIG_EDAC_ALTERA_SDMMC
{ .compatible = "altr,socfpga-sdmmc-ecc", .data = &a10_sdmmcecca_data },
#endif
+#ifdef CONFIG_EDAC_ALTERA_SDRAM
+ { .compatible = "altr,sdram-edac-s10", .data = &s10_sdramecc_data },
+#endif
{},
};
MODULE_DEVICE_TABLE(of, altr_edac_a10_device_of_match);
@@ -1866,6 +1886,7 @@ static void altr_edac_a10_irq_handler(struct irq_desc *desc)
struct altr_arria10_edac *edac = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
int irq = irq_desc_get_irq(desc);
+ unsigned long bits;
dberr = (irq == edac->db_irq) ? 1 : 0;
sm_offset = dberr ? A10_SYSMGR_ECC_INTSTAT_DERR_OFST :
@@ -1875,7 +1896,8 @@ static void altr_edac_a10_irq_handler(struct irq_desc *desc)
regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status);
- for_each_set_bit(bit, (unsigned long *)&irq_status, 32) {
+ bits = irq_status;
+ for_each_set_bit(bit, &bits, 32) {
irq = irq_linear_revmap(edac->domain, dberr * 32 + bit);
if (irq)
generic_handle_irq(irq);
@@ -1889,6 +1911,10 @@ static int validate_parent_available(struct device_node *np)
struct device_node *parent;
int ret = 0;
+ /* SDRAM must be present for Linux (implied parent) */
+ if (of_device_is_compatible(np, "altr,sdram-edac-s10"))
+ return 0;
+
/* Ensure parent device is enabled if parent node exists */
parent = of_parse_phandle(np, "altr,ecc-parent", 0);
if (parent && !of_device_is_available(parent))
@@ -1898,6 +1924,22 @@ static int validate_parent_available(struct device_node *np)
return ret;
}
+static int get_s10_sdram_edac_resource(struct device_node *np,
+ struct resource *res)
+{
+ struct device_node *parent;
+ int ret;
+
+ parent = of_parse_phandle(np, "altr,sdr-syscon", 0);
+ if (!parent)
+ return -ENODEV;
+
+ ret = of_address_to_resource(parent, 0, res);
+ of_node_put(parent);
+
+ return ret;
+}
+
static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
struct device_node *np)
{
@@ -1925,7 +1967,11 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL))
return -ENOMEM;
- rc = of_address_to_resource(np, 0, &res);
+ if (of_device_is_compatible(np, "altr,sdram-edac-s10"))
+ rc = get_s10_sdram_edac_resource(np, &res);
+ else
+ rc = of_address_to_resource(np, 0, &res);
+
if (rc < 0) {
edac_printk(KERN_ERR, EDAC_DEVICE,
"%s: no resource address\n", ecc_name);
@@ -2231,13 +2277,15 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
of_device_is_compatible(child, "altr,socfpga-dma-ecc") ||
of_device_is_compatible(child, "altr,socfpga-usb-ecc") ||
of_device_is_compatible(child, "altr,socfpga-qspi-ecc") ||
+#ifdef CONFIG_EDAC_ALTERA_SDRAM
+ of_device_is_compatible(child, "altr,sdram-edac-s10") ||
+#endif
of_device_is_compatible(child, "altr,socfpga-sdmmc-ecc"))
altr_edac_a10_device_add(edac, child);
#ifdef CONFIG_EDAC_ALTERA_SDRAM
- else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) ||
- (of_device_is_compatible(child, "altr,sdram-edac-s10")))
+ else if (of_device_is_compatible(child, "altr,sdram-edac-a10"))
of_platform_populate(pdev->dev.of_node,
altr_sdram_ctrl_of_match,
NULL, &pdev->dev);
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 55654cc4bcdf..3727e72c8c2e 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -289,6 +289,29 @@ struct altr_sdram_mc_data {
#define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000
/************* Stratix10 Defines **************/
+#define ALTR_S10_ECC_CTRL_SDRAM_OFST 0x00
+#define ALTR_S10_ECC_EN BIT(0)
+
+#define ALTR_S10_ECC_ERRINTEN_OFST 0x10
+#define ALTR_S10_ECC_ERRINTENS_OFST 0x14
+#define ALTR_S10_ECC_ERRINTENR_OFST 0x18
+#define ALTR_S10_ECC_SERRINTEN BIT(0)
+
+#define ALTR_S10_ECC_INTMODE_OFST 0x1C
+#define ALTR_S10_ECC_INTMODE BIT(0)
+
+#define ALTR_S10_ECC_INTSTAT_OFST 0x20
+#define ALTR_S10_ECC_SERRPENA BIT(0)
+#define ALTR_S10_ECC_DERRPENA BIT(8)
+#define ALTR_S10_ECC_ERRPENA_MASK (ALTR_S10_ECC_SERRPENA | \
+ ALTR_S10_ECC_DERRPENA)
+
+#define ALTR_S10_ECC_INTTEST_OFST 0x24
+#define ALTR_S10_ECC_TSERRA BIT(0)
+#define ALTR_S10_ECC_TDERRA BIT(8)
+#define ALTR_S10_ECC_TSERRB BIT(16)
+#define ALTR_S10_ECC_TDERRB BIT(24)
+
#define ALTR_S10_DERR_ADDRA_OFST 0x2C
/* Stratix10 ECC Manager Defines */
@@ -300,7 +323,7 @@ struct altr_sdram_mc_data {
#define S10_SYSMGR_UE_ADDR_OFST 0x224
#define S10_DDR0_IRQ_MASK BIT(16)
-#define S10_DBE_IRQ_MASK 0x3FE
+#define S10_DBE_IRQ_MASK 0x3FFFE
/* Define ECC Block Offsets for peripherals */
#define ECC_BLK_ADDRESS_OFST 0x40
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 873437be86d9..c1d4536ae466 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -788,51 +788,45 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan)
(dclr & BIT(15)) ? "yes" : "no");
}
-/*
- * The Address Mask should be a contiguous set of bits in the non-interleaved
- * case. So to check for CS interleaving, find the most- and least-significant
- * bits of the mask, generate a contiguous bitmask, and compare the two.
- */
-static bool f17_cs_interleaved(struct amd64_pvt *pvt, u8 ctrl, int cs)
+#define CS_EVEN_PRIMARY BIT(0)
+#define CS_ODD_PRIMARY BIT(1)
+#define CS_EVEN_SECONDARY BIT(2)
+#define CS_ODD_SECONDARY BIT(3)
+
+#define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY)
+#define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY)
+
+static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
{
- u32 mask = pvt->csels[ctrl].csmasks[cs >> 1];
- u32 msb = fls(mask) - 1, lsb = ffs(mask) - 1;
- u32 test_mask = GENMASK(msb, lsb);
+ int cs_mode = 0;
- edac_dbg(1, "mask=0x%08x test_mask=0x%08x\n", mask, test_mask);
+ if (csrow_enabled(2 * dimm, ctrl, pvt))
+ cs_mode |= CS_EVEN_PRIMARY;
- return mask ^ test_mask;
+ if (csrow_enabled(2 * dimm + 1, ctrl, pvt))
+ cs_mode |= CS_ODD_PRIMARY;
+
+ /* Asymmetric dual-rank DIMM support. */
+ if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt))
+ cs_mode |= CS_ODD_SECONDARY;
+
+ return cs_mode;
}
static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
{
- int dimm, size0, size1, cs0, cs1;
+ int dimm, size0, size1, cs0, cs1, cs_mode;
edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
- for (dimm = 0; dimm < 4; dimm++) {
- size0 = 0;
+ for (dimm = 0; dimm < 2; dimm++) {
cs0 = dimm * 2;
-
- if (csrow_enabled(cs0, ctrl, pvt))
- size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs0);
-
- size1 = 0;
cs1 = dimm * 2 + 1;
- if (csrow_enabled(cs1, ctrl, pvt)) {
- /*
- * CS interleaving is only supported if both CSes have
- * the same amount of memory. Because they are
- * interleaved, it will look like both CSes have the
- * full amount of memory. Save the size for both as
- * half the amount we found on CS0, if interleaved.
- */
- if (f17_cs_interleaved(pvt, ctrl, cs1))
- size1 = size0 = (size0 >> 1);
- else
- size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1);
- }
+ cs_mode = f17_get_cs_mode(dimm, ctrl, pvt);
+
+ size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0);
+ size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1);
amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
cs0, size0,
@@ -942,89 +936,119 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
} else if (pvt->fam == 0x15 && pvt->model == 0x30) {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
+ } else if (pvt->fam >= 0x17) {
+ int umc;
+
+ for_each_umc(umc) {
+ pvt->csels[umc].b_cnt = 4;
+ pvt->csels[umc].m_cnt = 2;
+ }
+
} else {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
}
}
+static void read_umc_base_mask(struct amd64_pvt *pvt)
+{
+ u32 umc_base_reg, umc_base_reg_sec;
+ u32 umc_mask_reg, umc_mask_reg_sec;
+ u32 base_reg, base_reg_sec;
+ u32 mask_reg, mask_reg_sec;
+ u32 *base, *base_sec;
+ u32 *mask, *mask_sec;
+ int cs, umc;
+
+ for_each_umc(umc) {
+ umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR;
+ umc_base_reg_sec = get_umc_base(umc) + UMCCH_BASE_ADDR_SEC;
+
+ for_each_chip_select(cs, umc, pvt) {
+ base = &pvt->csels[umc].csbases[cs];
+ base_sec = &pvt->csels[umc].csbases_sec[cs];
+
+ base_reg = umc_base_reg + (cs * 4);
+ base_reg_sec = umc_base_reg_sec + (cs * 4);
+
+ if (!amd_smn_read(pvt->mc_node_id, base_reg, base))
+ edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *base, base_reg);
+
+ if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, base_sec))
+ edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *base_sec, base_reg_sec);
+ }
+
+ umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
+ umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC;
+
+ for_each_chip_select_mask(cs, umc, pvt) {
+ mask = &pvt->csels[umc].csmasks[cs];
+ mask_sec = &pvt->csels[umc].csmasks_sec[cs];
+
+ mask_reg = umc_mask_reg + (cs * 4);
+ mask_reg_sec = umc_mask_reg_sec + (cs * 4);
+
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask))
+ edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *mask, mask_reg);
+
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, mask_sec))
+ edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *mask_sec, mask_reg_sec);
+ }
+ }
+}
+
/*
* Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers
*/
static void read_dct_base_mask(struct amd64_pvt *pvt)
{
- int base_reg0, base_reg1, mask_reg0, mask_reg1, cs;
+ int cs;
prep_chip_selects(pvt);
- if (pvt->umc) {
- base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR;
- base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR;
- mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK;
- mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK;
- } else {
- base_reg0 = DCSB0;
- base_reg1 = DCSB1;
- mask_reg0 = DCSM0;
- mask_reg1 = DCSM1;
- }
+ if (pvt->umc)
+ return read_umc_base_mask(pvt);
for_each_chip_select(cs, 0, pvt) {
- int reg0 = base_reg0 + (cs * 4);
- int reg1 = base_reg1 + (cs * 4);
+ int reg0 = DCSB0 + (cs * 4);
+ int reg1 = DCSB1 + (cs * 4);
u32 *base0 = &pvt->csels[0].csbases[cs];
u32 *base1 = &pvt->csels[1].csbases[cs];
- if (pvt->umc) {
- if (!amd_smn_read(pvt->mc_node_id, reg0, base0))
- edac_dbg(0, " DCSB0[%d]=0x%08x reg: 0x%x\n",
- cs, *base0, reg0);
+ if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
+ edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n",
+ cs, *base0, reg0);
- if (!amd_smn_read(pvt->mc_node_id, reg1, base1))
- edac_dbg(0, " DCSB1[%d]=0x%08x reg: 0x%x\n",
- cs, *base1, reg1);
- } else {
- if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0))
- edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n",
- cs, *base0, reg0);
-
- if (pvt->fam == 0xf)
- continue;
+ if (pvt->fam == 0xf)
+ continue;
- if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
- edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n",
- cs, *base1, (pvt->fam == 0x10) ? reg1
- : reg0);
- }
+ if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1))
+ edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n",
+ cs, *base1, (pvt->fam == 0x10) ? reg1
+ : reg0);
}
for_each_chip_select_mask(cs, 0, pvt) {
- int reg0 = mask_reg0 + (cs * 4);
- int reg1 = mask_reg1 + (cs * 4);
+ int reg0 = DCSM0 + (cs * 4);
+ int reg1 = DCSM1 + (cs * 4);
u32 *mask0 = &pvt->csels[0].csmasks[cs];
u32 *mask1 = &pvt->csels[1].csmasks[cs];
- if (pvt->umc) {
- if (!amd_smn_read(pvt->mc_node_id, reg0, mask0))
- edac_dbg(0, " DCSM0[%d]=0x%08x reg: 0x%x\n",
- cs, *mask0, reg0);
-
- if (!amd_smn_read(pvt->mc_node_id, reg1, mask1))
- edac_dbg(0, " DCSM1[%d]=0x%08x reg: 0x%x\n",
- cs, *mask1, reg1);
- } else {
- if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
- edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n",
- cs, *mask0, reg0);
+ if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0))
+ edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n",
+ cs, *mask0, reg0);
- if (pvt->fam == 0xf)
- continue;
+ if (pvt->fam == 0xf)
+ continue;
- if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
- edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n",
- cs, *mask1, (pvt->fam == 0x10) ? reg1
- : reg0);
- }
+ if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1))
+ edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n",
+ cs, *mask1, (pvt->fam == 0x10) ? reg1
+ : reg0);
}
}
@@ -1556,18 +1580,58 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
return ddr3_cs_size(cs_mode, false);
}
-static int f17_base_addr_to_cs_size(struct amd64_pvt *pvt, u8 umc,
+static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 base_addr = pvt->csels[umc].csbases[csrow_nr];
+ u32 addr_mask_orig, addr_mask_deinterleaved;
+ u32 msb, weight, num_zero_bits;
+ int dimm, size = 0;
- /* Each mask is used for every two base addresses. */
- u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr >> 1];
+ /* No Chip Selects are enabled. */
+ if (!cs_mode)
+ return size;
- /* Register [31:1] = Address [39:9]. Size is in kBs here. */
- u32 size = ((addr_mask >> 1) - (base_addr >> 1) + 1) >> 1;
+ /* Requested size of an even CS but none are enabled. */
+ if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1))
+ return size;
- edac_dbg(1, "BaseAddr: 0x%x, AddrMask: 0x%x\n", base_addr, addr_mask);
+ /* Requested size of an odd CS but none are enabled. */
+ if (!(cs_mode & CS_ODD) && (csrow_nr & 1))
+ return size;
+
+ /*
+ * There is one mask per DIMM, and two Chip Selects per DIMM.
+ * CS0 and CS1 -> DIMM0
+ * CS2 and CS3 -> DIMM1
+ */
+ dimm = csrow_nr >> 1;
+
+ /* Asymmetric dual-rank DIMM support. */
+ if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
+ addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm];
+ else
+ addr_mask_orig = pvt->csels[umc].csmasks[dimm];
+
+ /*
+ * The number of zero bits in the mask is equal to the number of bits
+ * in a full mask minus the number of bits in the current mask.
+ *
+ * The MSB is the number of bits in the full mask because BIT[0] is
+ * always 0.
+ */
+ msb = fls(addr_mask_orig) - 1;
+ weight = hweight_long(addr_mask_orig);
+ num_zero_bits = msb - weight;
+
+ /* Take the number of zero bits off from the top of the mask. */
+ addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+
+ edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
+ edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+
+ /* Register [31:1] = Address [39:9]. Size is in kBs here. */
+ size = (addr_mask_deinterleaved >> 2) + 1;
/* Return size in MBs. */
return size >> 10;
@@ -2232,7 +2296,7 @@ static struct amd64_family_type family_types[] = {
.f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6,
.ops = {
.early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_base_addr_to_cs_size,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
}
},
[F17_M10H_CPUS] = {
@@ -2241,7 +2305,7 @@ static struct amd64_family_type family_types[] = {
.f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6,
.ops = {
.early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_base_addr_to_cs_size,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
}
},
[F17_M30H_CPUS] = {
@@ -2250,7 +2314,16 @@ static struct amd64_family_type family_types[] = {
.f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
.ops = {
.early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_base_addr_to_cs_size,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
+ }
+ },
+ [F17_M70H_CPUS] = {
+ .ctl_name = "F17h_M70h",
+ .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0,
+ .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6,
+ .ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
}
},
};
@@ -2537,13 +2610,6 @@ static void decode_umc_error(int node_id, struct mce *m)
err.channel = find_umc_channel(m);
- if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
- err.err_code = ERR_NORM_ADDR;
- goto log_error;
- }
-
- error_address_to_page_and_offset(sys_addr, &err);
-
if (!(m->status & MCI_STATUS_SYNDV)) {
err.err_code = ERR_SYND;
goto log_error;
@@ -2560,6 +2626,13 @@ static void decode_umc_error(int node_id, struct mce *m)
err.csrow = m->synd & 0x7;
+ if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
+ err.err_code = ERR_NORM_ADDR;
+ goto log_error;
+ }
+
+ error_address_to_page_and_offset(sys_addr, &err);
+
log_error:
__log_ecc_error(mci, &err, ecc_type);
}
@@ -2809,10 +2882,12 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig)
int csrow_nr = csrow_nr_orig;
u32 cs_mode, nr_pages;
- if (!pvt->umc)
+ if (!pvt->umc) {
csrow_nr >>= 1;
-
- cs_mode = DBAM_DIMM(csrow_nr, dbam);
+ cs_mode = DBAM_DIMM(csrow_nr, dbam);
+ } else {
+ cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt);
+ }
nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr);
nr_pages <<= 20 - PAGE_SHIFT;
@@ -2824,6 +2899,49 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig)
return nr_pages;
}
+static int init_csrows_df(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ enum edac_type edac_mode = EDAC_NONE;
+ enum dev_type dev_type = DEV_UNKNOWN;
+ struct dimm_info *dimm;
+ int empty = 1;
+ u8 umc, cs;
+
+ if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) {
+ edac_mode = EDAC_S16ECD16ED;
+ dev_type = DEV_X16;
+ } else if (mci->edac_ctl_cap & EDAC_FLAG_S8ECD8ED) {
+ edac_mode = EDAC_S8ECD8ED;
+ dev_type = DEV_X8;
+ } else if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) {
+ edac_mode = EDAC_S4ECD4ED;
+ dev_type = DEV_X4;
+ } else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) {
+ edac_mode = EDAC_SECDED;
+ }
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ if (!csrow_enabled(cs, umc, pvt))
+ continue;
+
+ empty = 0;
+ dimm = mci->csrows[cs]->channels[umc]->dimm;
+
+ edac_dbg(1, "MC node: %d, csrow: %d\n",
+ pvt->mc_node_id, cs);
+
+ dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs);
+ dimm->mtype = pvt->dram_type;
+ dimm->edac_mode = edac_mode;
+ dimm->dtype = dev_type;
+ }
+ }
+
+ return empty;
+}
+
/*
* Initialize the array of csrow attribute instances, based on the values
* from pci config hardware registers.
@@ -2838,15 +2956,16 @@ static int init_csrows(struct mem_ctl_info *mci)
int nr_pages = 0;
u32 val;
- if (!pvt->umc) {
- amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
+ if (pvt->umc)
+ return init_csrows_df(mci);
- pvt->nbcfg = val;
+ amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
- edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
- pvt->mc_node_id, val,
- !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
- }
+ pvt->nbcfg = val;
+
+ edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
+ pvt->mc_node_id, val,
+ !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
/*
* We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
@@ -2883,13 +3002,7 @@ static int init_csrows(struct mem_ctl_info *mci)
edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
/* Determine DIMM ECC mode: */
- if (pvt->umc) {
- if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED)
- edac_mode = EDAC_S4ECD4ED;
- else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED)
- edac_mode = EDAC_SECDED;
-
- } else if (pvt->nbcfg & NBCFG_ECC_ENABLE) {
+ if (pvt->nbcfg & NBCFG_ECC_ENABLE) {
edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL)
? EDAC_S4ECD4ED
: EDAC_SECDED;
@@ -3137,12 +3250,15 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
static inline void
f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
{
- u8 i, ecc_en = 1, cpk_en = 1;
+ u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1;
for_each_umc(i) {
if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) {
ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED);
cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP);
+
+ dev_x4 &= !!(pvt->umc[i].dimm_cfg & BIT(6));
+ dev_x16 &= !!(pvt->umc[i].dimm_cfg & BIT(7));
}
}
@@ -3150,8 +3266,15 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
if (ecc_en) {
mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
- if (cpk_en)
+ if (!cpk_en)
+ return;
+
+ if (dev_x4)
mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED;
+ else if (dev_x16)
+ mci->edac_ctl_cap |= EDAC_FLAG_S16ECD16ED;
+ else
+ mci->edac_ctl_cap |= EDAC_FLAG_S8ECD8ED;
}
}
@@ -3241,6 +3364,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
fam_type = &family_types[F17_M30H_CPUS];
pvt->ops = &family_types[F17_M30H_CPUS].ops;
break;
+ } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) {
+ fam_type = &family_types[F17_M70H_CPUS];
+ pvt->ops = &family_types[F17_M70H_CPUS].ops;
+ break;
}
/* fall through */
case 0x18:
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 8f66472f7adc..8c3cda81e619 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -96,6 +96,7 @@
/* Hardware limit on ChipSelect rows per MC and processors per system */
#define NUM_CHIPSELECTS 8
#define DRAM_RANGES 8
+#define NUM_CONTROLLERS 8
#define ON true
#define OFF false
@@ -119,6 +120,8 @@
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496
+#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440
+#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446
/*
* Function 1 - Address Map
@@ -168,7 +171,8 @@
#define DCSM0 0x60
#define DCSM1 0x160
-#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE)
+#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE)
+#define csrow_sec_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases_sec[(i)] & DCSB_CS_ENABLE)
#define DRAM_CONTROL 0x78
@@ -258,7 +262,9 @@
/* UMC CH register offsets */
#define UMCCH_BASE_ADDR 0x0
+#define UMCCH_BASE_ADDR_SEC 0x10
#define UMCCH_ADDR_MASK 0x20
+#define UMCCH_ADDR_MASK_SEC 0x28
#define UMCCH_ADDR_CFG 0x30
#define UMCCH_DIMM_CFG 0x80
#define UMCCH_UMC_CFG 0x100
@@ -285,6 +291,7 @@ enum amd_families {
F17_CPUS,
F17_M10H_CPUS,
F17_M30H_CPUS,
+ F17_M70H_CPUS,
NUM_FAMILIES,
};
@@ -311,9 +318,11 @@ struct dram_range {
/* A DCT chip selects collection */
struct chip_select {
u32 csbases[NUM_CHIPSELECTS];
+ u32 csbases_sec[NUM_CHIPSELECTS];
u8 b_cnt;
u32 csmasks[NUM_CHIPSELECTS];
+ u32 csmasks_sec[NUM_CHIPSELECTS];
u8 m_cnt;
};
@@ -351,8 +360,8 @@ struct amd64_pvt {
u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */
u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */
- /* one for each DCT */
- struct chip_select csels[2];
+ /* one for each DCT/UMC */
+ struct chip_select csels[NUM_CONTROLLERS];
/* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */
struct dram_range ranges[DRAM_RANGES];
diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c
new file mode 100644
index 000000000000..e4736eb37bfb
--- /dev/null
+++ b/drivers/edac/bluefield_edac.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Bluefield-specific EDAC driver.
+ *
+ * Copyright (c) 2019 Mellanox Technologies.
+ */
+
+#include <linux/acpi.h>
+#include <linux/arm-smccc.h>
+#include <linux/bitfield.h>
+#include <linux/edac.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include "edac_module.h"
+
+#define DRIVER_NAME "bluefield-edac"
+
+/*
+ * Mellanox BlueField EMI (External Memory Interface) register definitions.
+ */
+
+#define MLXBF_ECC_CNT 0x340
+#define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0)
+#define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16)
+
+#define MLXBF_ECC_ERR 0x348
+#define MLXBF_ECC_ERR__SECC BIT(0)
+#define MLXBF_ECC_ERR__DECC BIT(16)
+
+#define MLXBF_ECC_LATCH_SEL 0x354
+#define MLXBF_ECC_LATCH_SEL__START BIT(24)
+
+#define MLXBF_ERR_ADDR_0 0x358
+
+#define MLXBF_ERR_ADDR_1 0x37c
+
+#define MLXBF_SYNDROM 0x35c
+#define MLXBF_SYNDROM__DERR BIT(0)
+#define MLXBF_SYNDROM__SERR BIT(1)
+#define MLXBF_SYNDROM__SYN GENMASK(25, 16)
+
+#define MLXBF_ADD_INFO 0x364
+#define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8)
+
+#define MLXBF_EDAC_MAX_DIMM_PER_MC 2
+#define MLXBF_EDAC_ERROR_GRAIN 8
+
+/*
+ * Request MLNX_SIP_GET_DIMM_INFO
+ *
+ * Retrieve information about DIMM on a certain slot.
+ *
+ * Call register usage:
+ * a0: MLNX_SIP_GET_DIMM_INFO
+ * a1: (Memory controller index) << 16 | (Dimm index in memory controller)
+ * a2-7: not used.
+ *
+ * Return status:
+ * a0: MLXBF_DIMM_INFO defined below describing the DIMM.
+ * a1-3: not used.
+ */
+#define MLNX_SIP_GET_DIMM_INFO 0x82000008
+
+/* Format for the SMC response about the memory information */
+#define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0)
+#define MLXBF_DIMM_INFO__IS_RDIMM BIT(16)
+#define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17)
+#define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18)
+#define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21)
+#define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24)
+
+struct bluefield_edac_priv {
+ int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC];
+ void __iomem *emi_base;
+ int dimm_per_mc;
+};
+
+static u64 smc_call1(u64 smc_op, u64 smc_arg)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+/*
+ * Gather the ECC information from the External Memory Interface registers
+ * and report it to the edac handler.
+ */
+static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
+ int error_cnt,
+ int is_single_ecc)
+{
+ struct bluefield_edac_priv *priv = mci->pvt_info;
+ u32 dram_additional_info, err_prank, edea0, edea1;
+ u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom;
+ enum hw_event_mc_err_type ecc_type;
+ u64 ecc_dimm_addr;
+ int ecc_dimm;
+
+ ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED :
+ HW_EVENT_ERR_UNCORRECTED;
+
+ /*
+ * Tell the External Memory Interface to populate the relevant
+ * registers with information about the last ECC error occurrence.
+ */
+ ecc_latch_select = MLXBF_ECC_LATCH_SEL__START;
+ writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL);
+
+ /*
+ * Verify that the ECC reported info in the registers is of the
+ * same type as the one asked to report. If not, just report the
+ * error without the detailed information.
+ */
+ dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM);
+ serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom);
+ derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom);
+ syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom);
+
+ if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) {
+ edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0,
+ 0, 0, -1, mci->ctl_name, "");
+ return;
+ }
+
+ dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO);
+ err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info);
+
+ ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0;
+
+ edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0);
+ edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1);
+
+ ecc_dimm_addr = ((u64)edea1 << 32) | edea0;
+
+ edac_mc_handle_error(ecc_type, mci, error_cnt,
+ PFN_DOWN(ecc_dimm_addr),
+ offset_in_page(ecc_dimm_addr),
+ syndrom, ecc_dimm, 0, 0, mci->ctl_name, "");
+}
+
+static void bluefield_edac_check(struct mem_ctl_info *mci)
+{
+ struct bluefield_edac_priv *priv = mci->pvt_info;
+ u32 ecc_count, single_error_count, double_error_count, ecc_error = 0;
+
+ /*
+ * The memory controller might not be initialized by the firmware
+ * when there isn't memory, which may lead to bad register readings.
+ */
+ if (mci->edac_cap == EDAC_FLAG_NONE)
+ return;
+
+ ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT);
+ single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count);
+ double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count);
+
+ if (single_error_count) {
+ ecc_error |= MLXBF_ECC_ERR__SECC;
+
+ bluefield_gather_report_ecc(mci, single_error_count, 1);
+ }
+
+ if (double_error_count) {
+ ecc_error |= MLXBF_ECC_ERR__DECC;
+
+ bluefield_gather_report_ecc(mci, double_error_count, 0);
+ }
+
+ /* Write to clear reported errors. */
+ if (ecc_count)
+ writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR);
+}
+
+/* Initialize the DIMMs information for the given memory controller. */
+static void bluefield_edac_init_dimms(struct mem_ctl_info *mci)
+{
+ struct bluefield_edac_priv *priv = mci->pvt_info;
+ int mem_ctrl_idx = mci->mc_idx;
+ struct dimm_info *dimm;
+ u64 smc_info, smc_arg;
+ int is_empty = 1, i;
+
+ for (i = 0; i < priv->dimm_per_mc; i++) {
+ dimm = mci->dimms[i];
+
+ smc_arg = mem_ctrl_idx << 16 | i;
+ smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg);
+
+ if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) {
+ dimm->mtype = MEM_EMPTY;
+ continue;
+ }
+
+ is_empty = 0;
+
+ dimm->edac_mode = EDAC_SECDED;
+
+ if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info))
+ dimm->mtype = MEM_NVDIMM;
+ else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info))
+ dimm->mtype = MEM_LRDDR4;
+ else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info))
+ dimm->mtype = MEM_RDDR4;
+ else
+ dimm->mtype = MEM_DDR4;
+
+ dimm->nr_pages =
+ FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) *
+ (SZ_1G / PAGE_SIZE);
+ dimm->grain = MLXBF_EDAC_ERROR_GRAIN;
+
+ /* Mem controller for BlueField only supports x4, x8 and x16 */
+ switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) {
+ case 4:
+ dimm->dtype = DEV_X4;
+ break;
+ case 8:
+ dimm->dtype = DEV_X8;
+ break;
+ case 16:
+ dimm->dtype = DEV_X16;
+ break;
+ default:
+ dimm->dtype = DEV_UNKNOWN;
+ }
+
+ priv->dimm_ranks[i] =
+ FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info);
+ }
+
+ if (is_empty)
+ mci->edac_cap = EDAC_FLAG_NONE;
+ else
+ mci->edac_cap = EDAC_FLAG_SECDED;
+}
+
+static int bluefield_edac_mc_probe(struct platform_device *pdev)
+{
+ struct bluefield_edac_priv *priv;
+ struct device *dev = &pdev->dev;
+ struct edac_mc_layer layers[1];
+ struct mem_ctl_info *mci;
+ struct resource *emi_res;
+ unsigned int mc_idx, dimm_count;
+ int rc, ret;
+
+ /* Read the MSS (Memory SubSystem) index from ACPI table. */
+ if (device_property_read_u32(dev, "mss_number", &mc_idx)) {
+ dev_warn(dev, "bf_edac: MSS number unknown\n");
+ return -EINVAL;
+ }
+
+ /* Read the DIMMs per MC from ACPI table. */
+ if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) {
+ dev_warn(dev, "bf_edac: DIMMs per MC unknown\n");
+ return -EINVAL;
+ }
+
+ if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) {
+ dev_warn(dev, "bf_edac: DIMMs per MC not valid\n");
+ return -EINVAL;
+ }
+
+ emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!emi_res)
+ return -EINVAL;
+
+ layers[0].type = EDAC_MC_LAYER_SLOT;
+ layers[0].size = dimm_count;
+ layers[0].is_virt_csrow = true;
+
+ mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv));
+ if (!mci)
+ return -ENOMEM;
+
+ priv = mci->pvt_info;
+
+ priv->dimm_per_mc = dimm_count;
+ priv->emi_base = devm_ioremap_resource(dev, emi_res);
+ if (IS_ERR(priv->emi_base)) {
+ dev_err(dev, "failed to map EMI IO resource\n");
+ ret = PTR_ERR(priv->emi_base);
+ goto err;
+ }
+
+ mci->pdev = dev;
+ mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 |
+ MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+
+ mci->mod_name = DRIVER_NAME;
+ mci->ctl_name = "BlueField_Memory_Controller";
+ mci->dev_name = dev_name(dev);
+ mci->edac_check = bluefield_edac_check;
+
+ /* Initialize mci with the actual populated DIMM information. */
+ bluefield_edac_init_dimms(mci);
+
+ platform_set_drvdata(pdev, mci);
+
+ /* Register with EDAC core */
+ rc = edac_mc_add_mc(mci);
+ if (rc) {
+ dev_err(dev, "failed to register with EDAC core\n");
+ ret = rc;
+ goto err;
+ }
+
+ /* Only POLL mode supported so far. */
+ edac_op_state = EDAC_OPSTATE_POLL;
+
+ return 0;
+
+err:
+ edac_mc_free(mci);
+
+ return ret;
+
+}
+
+static int bluefield_edac_mc_remove(struct platform_device *pdev)
+{
+ struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+
+ return 0;
+}
+
+static const struct acpi_device_id bluefield_mc_acpi_ids[] = {
+ {"MLNXBF08", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids);
+
+static struct platform_driver bluefield_edac_mc_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ .acpi_match_table = bluefield_mc_acpi_ids,
+ },
+ .probe = bluefield_edac_mc_probe,
+ .remove = bluefield_edac_mc_remove,
+};
+
+module_platform_driver(bluefield_edac_mc_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField memory edac driver");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 64922c8fa7e3..e6fd079783bd 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -114,8 +114,8 @@ static const struct kernel_param_ops edac_report_ops = {
module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644);
-unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
- unsigned len)
+unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf,
+ unsigned int len)
{
struct mem_ctl_info *mci = dimm->mci;
int i, n, count = 0;
@@ -236,9 +236,9 @@ EXPORT_SYMBOL_GPL(edac_mem_types);
* At return, the pointer 'p' will be incremented to be used on a next call
* to this function.
*/
-void *edac_align_ptr(void **p, unsigned size, int n_elems)
+void *edac_align_ptr(void **p, unsigned int size, int n_elems)
{
- unsigned align, r;
+ unsigned int align, r;
void *ptr = *p;
*p += size * n_elems;
@@ -275,38 +275,37 @@ void *edac_align_ptr(void **p, unsigned size, int n_elems)
static void _edac_mc_free(struct mem_ctl_info *mci)
{
- int i, chn, row;
struct csrow_info *csr;
- const unsigned int tot_dimms = mci->tot_dimms;
- const unsigned int tot_channels = mci->num_cschannel;
- const unsigned int tot_csrows = mci->nr_csrows;
+ int i, chn, row;
if (mci->dimms) {
- for (i = 0; i < tot_dimms; i++)
+ for (i = 0; i < mci->tot_dimms; i++)
kfree(mci->dimms[i]);
kfree(mci->dimms);
}
+
if (mci->csrows) {
- for (row = 0; row < tot_csrows; row++) {
+ for (row = 0; row < mci->nr_csrows; row++) {
csr = mci->csrows[row];
- if (csr) {
- if (csr->channels) {
- for (chn = 0; chn < tot_channels; chn++)
- kfree(csr->channels[chn]);
- kfree(csr->channels);
- }
- kfree(csr);
+ if (!csr)
+ continue;
+
+ if (csr->channels) {
+ for (chn = 0; chn < mci->num_cschannel; chn++)
+ kfree(csr->channels[chn]);
+ kfree(csr->channels);
}
+ kfree(csr);
}
kfree(mci->csrows);
}
kfree(mci);
}
-struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
- unsigned n_layers,
+struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
+ unsigned int n_layers,
struct edac_mc_layer *layers,
- unsigned sz_pvt)
+ unsigned int sz_pvt)
{
struct mem_ctl_info *mci;
struct edac_mc_layer *layer;
@@ -314,9 +313,9 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
struct rank_info *chan;
struct dimm_info *dimm;
u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
- unsigned pos[EDAC_MAX_LAYERS];
- unsigned size, tot_dimms = 1, count = 1;
- unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
+ unsigned int pos[EDAC_MAX_LAYERS];
+ unsigned int size, tot_dimms = 1, count = 1;
+ unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
void *pvt, *p, *ptr = NULL;
int i, j, row, chn, n, len, off;
bool per_rank = false;
@@ -1235,9 +1234,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
if (p > e->location)
*(p - 1) = '\0';
- /* Report the error via the trace interface */
- grain_bits = fls_long(e->grain) + 1;
+ /* Sanity-check driver-supplied grain value. */
+ if (WARN_ON_ONCE(!e->grain))
+ e->grain = 1;
+ grain_bits = fls_long(e->grain - 1);
+
+ /* Report the error via the trace interface */
if (IS_ENABLED(CONFIG_RAS))
trace_mc_event(type, e->msg, e->label, e->error_count,
mci->mc_idx, e->top_layer, e->mid_layer,
diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_mc.h
index 4165e15995ad..02aac5c61d00 100644
--- a/drivers/edac/edac_mc.h
+++ b/drivers/edac/edac_mc.h
@@ -122,10 +122,10 @@ do { \
* On success, return a pointer to struct mem_ctl_info pointer;
* %NULL otherwise
*/
-struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
- unsigned n_layers,
+struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
+ unsigned int n_layers,
struct edac_mc_layer *layers,
- unsigned sz_pvt);
+ unsigned int sz_pvt);
/**
* edac_get_owner - Return the owner's mod_name of EDAC MC
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 4386ea4b9b5a..32d016f1ecd1 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -131,7 +131,7 @@ static const char * const edac_caps[] = {
struct dev_ch_attribute {
struct device_attribute attr;
- int channel;
+ unsigned int channel;
};
#define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \
@@ -200,7 +200,7 @@ static ssize_t channel_dimm_label_show(struct device *dev,
char *data)
{
struct csrow_info *csrow = to_csrow(dev);
- unsigned chan = to_channel(mattr);
+ unsigned int chan = to_channel(mattr);
struct rank_info *rank = csrow->channels[chan];
/* if field has not been initialized, there is nothing to send */
@@ -216,7 +216,7 @@ static ssize_t channel_dimm_label_store(struct device *dev,
const char *data, size_t count)
{
struct csrow_info *csrow = to_csrow(dev);
- unsigned chan = to_channel(mattr);
+ unsigned int chan = to_channel(mattr);
struct rank_info *rank = csrow->channels[chan];
size_t copy_count = count;
@@ -240,7 +240,7 @@ static ssize_t channel_ce_count_show(struct device *dev,
struct device_attribute *mattr, char *data)
{
struct csrow_info *csrow = to_csrow(dev);
- unsigned chan = to_channel(mattr);
+ unsigned int chan = to_channel(mattr);
struct rank_info *rank = csrow->channels[chan];
return sprintf(data, "%u\n", rank->ce_count);
@@ -278,7 +278,7 @@ static void csrow_attr_release(struct device *dev)
{
struct csrow_info *csrow = container_of(dev, struct csrow_info, dev);
- edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev));
+ edac_dbg(1, "device %s released\n", dev_name(dev));
kfree(csrow);
}
@@ -414,14 +414,16 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
dev_set_name(&csrow->dev, "csrow%d", index);
dev_set_drvdata(&csrow->dev, csrow);
- edac_dbg(0, "creating (virtual) csrow node %s\n",
- dev_name(&csrow->dev));
-
err = device_add(&csrow->dev);
- if (err)
+ if (err) {
+ edac_dbg(1, "failure: create device %s\n", dev_name(&csrow->dev));
put_device(&csrow->dev);
+ return err;
+ }
- return err;
+ edac_dbg(0, "device %s created\n", dev_name(&csrow->dev));
+
+ return 0;
}
/* Create a CSROW object under specifed edac_mc_device */
@@ -435,12 +437,8 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
if (!nr_pages_per_csrow(csrow))
continue;
err = edac_create_csrow_object(mci, mci->csrows[i], i);
- if (err < 0) {
- edac_dbg(1,
- "failure: create csrow objects for csrow %d\n",
- i);
+ if (err < 0)
goto error;
- }
}
return 0;
@@ -624,7 +622,7 @@ static void dimm_attr_release(struct device *dev)
{
struct dimm_info *dimm = container_of(dev, struct dimm_info, dev);
- edac_dbg(1, "Releasing dimm device %s\n", dev_name(dev));
+ edac_dbg(1, "device %s released\n", dev_name(dev));
kfree(dimm);
}
@@ -653,12 +651,21 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci,
pm_runtime_forbid(&mci->dev);
err = device_add(&dimm->dev);
- if (err)
+ if (err) {
+ edac_dbg(1, "failure: create device %s\n", dev_name(&dimm->dev));
put_device(&dimm->dev);
+ return err;
+ }
- edac_dbg(0, "created rank/dimm device %s\n", dev_name(&dimm->dev));
+ if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
+ char location[80];
- return err;
+ edac_dimm_info_location(dimm, location, sizeof(location));
+ edac_dbg(0, "device %s created at location %s\n",
+ dev_name(&dimm->dev), location);
+ }
+
+ return 0;
}
/*
@@ -901,7 +908,7 @@ static void mci_attr_release(struct device *dev)
{
struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
- edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev));
+ edac_dbg(1, "device %s released\n", dev_name(dev));
kfree(mci);
}
@@ -933,14 +940,15 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
dev_set_drvdata(&mci->dev, mci);
pm_runtime_forbid(&mci->dev);
- edac_dbg(0, "creating device %s\n", dev_name(&mci->dev));
err = device_add(&mci->dev);
if (err < 0) {
edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
put_device(&mci->dev);
- goto out;
+ return err;
}
+ edac_dbg(0, "device %s created\n", dev_name(&mci->dev));
+
/*
* Create the dimm/rank devices
*/
@@ -950,22 +958,9 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
if (!dimm->nr_pages)
continue;
-#ifdef CONFIG_EDAC_DEBUG
- edac_dbg(1, "creating dimm%d, located at ", i);
- if (edac_debug_level >= 1) {
- int lay;
- for (lay = 0; lay < mci->n_layers; lay++)
- printk(KERN_CONT "%s %d ",
- edac_layer_name[mci->layers[lay].type],
- dimm->location[lay]);
- printk(KERN_CONT "\n");
- }
-#endif
err = edac_create_dimm_object(mci, dimm, i);
- if (err) {
- edac_dbg(1, "failure: create dimm %d obj\n", i);
+ if (err)
goto fail_unregister_dimm;
- }
}
#ifdef CONFIG_EDAC_LEGACY_SYSFS
@@ -987,7 +982,6 @@ fail_unregister_dimm:
}
device_unregister(&mci->dev);
-out:
return err;
}
@@ -1011,14 +1005,14 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
struct dimm_info *dimm = mci->dimms[i];
if (dimm->nr_pages == 0)
continue;
- edac_dbg(0, "removing device %s\n", dev_name(&dimm->dev));
+ edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev));
device_unregister(&dimm->dev);
}
}
void edac_unregister_sysfs(struct mem_ctl_info *mci)
{
- edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev));
+ edac_dbg(1, "unregistering device %s\n", dev_name(&mci->dev));
device_unregister(&mci->dev);
}
@@ -1029,7 +1023,7 @@ static void mc_attr_release(struct device *dev)
* parent device, used to create the /sys/devices/mc sysfs node.
* So, there are no attributes on it.
*/
- edac_dbg(1, "Releasing device %s\n", dev_name(dev));
+ edac_dbg(1, "device %s released\n", dev_name(dev));
kfree(dev);
}
@@ -1044,10 +1038,8 @@ int __init edac_mc_sysfs_init(void)
int err;
mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL);
- if (!mci_pdev) {
- err = -ENOMEM;
- goto out;
- }
+ if (!mci_pdev)
+ return -ENOMEM;
mci_pdev->bus = edac_get_sysfs_subsys();
mci_pdev->type = &mc_attr_type;
@@ -1055,17 +1047,15 @@ int __init edac_mc_sysfs_init(void)
dev_set_name(mci_pdev, "mc");
err = device_add(mci_pdev);
- if (err < 0)
- goto out_put_device;
+ if (err < 0) {
+ edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev));
+ put_device(mci_pdev);
+ return err;
+ }
edac_dbg(0, "device %s created\n", dev_name(mci_pdev));
return 0;
-
- out_put_device:
- put_device(mci_pdev);
- out:
- return err;
}
void edac_mc_sysfs_exit(void)
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 7f19f1c672c3..d413a0bdc9ad 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -68,7 +68,7 @@ struct memdev_dmi_entry {
struct ghes_edac_dimm_fill {
struct mem_ctl_info *mci;
- unsigned count;
+ unsigned int count;
};
static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index b506eef6b146..251f2b692785 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -417,7 +417,8 @@ static const char *i5100_err_msg(unsigned err)
}
/* convert csrow index into a rank (per channel -- 0..5) */
-static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
+static unsigned int i5100_csrow_to_rank(const struct mem_ctl_info *mci,
+ unsigned int csrow)
{
const struct i5100_priv *priv = mci->pvt_info;
@@ -425,7 +426,8 @@ static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
}
/* convert csrow index into a channel (0..1) */
-static int i5100_csrow_to_chan(const struct mem_ctl_info *mci, int csrow)
+static unsigned int i5100_csrow_to_chan(const struct mem_ctl_info *mci,
+ unsigned int csrow)
{
const struct i5100_priv *priv = mci->pvt_info;
@@ -653,11 +655,11 @@ static struct pci_dev *pci_get_device_func(unsigned vendor,
return ret;
}
-static unsigned long i5100_npages(struct mem_ctl_info *mci, int csrow)
+static unsigned long i5100_npages(struct mem_ctl_info *mci, unsigned int csrow)
{
struct i5100_priv *priv = mci->pvt_info;
- const unsigned chan_rank = i5100_csrow_to_rank(mci, csrow);
- const unsigned chan = i5100_csrow_to_chan(mci, csrow);
+ const unsigned int chan_rank = i5100_csrow_to_rank(mci, csrow);
+ const unsigned int chan = i5100_csrow_to_chan(mci, csrow);
unsigned addr_lines;
/* dimm present? */
@@ -852,8 +854,8 @@ static void i5100_init_csrows(struct mem_ctl_info *mci)
for (i = 0; i < mci->tot_dimms; i++) {
struct dimm_info *dimm;
const unsigned long npages = i5100_npages(mci, i);
- const unsigned chan = i5100_csrow_to_chan(mci, i);
- const unsigned rank = i5100_csrow_to_rank(mci, i);
+ const unsigned int chan = i5100_csrow_to_chan(mci, i);
+ const unsigned int rank = i5100_csrow_to_rank(mci, i);
if (!npages)
continue;
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index ca25f8fe57ef..1ad538baaa4a 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -260,11 +260,14 @@ static u64 get_sideband_reg_base_addr(void)
}
}
+#define DNV_MCHBAR_SIZE 0x8000
+#define DNV_SB_PORT_SIZE 0x10000
static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *name)
{
struct pci_dev *pdev;
char *base;
u64 addr;
+ unsigned long size;
if (op == 4) {
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL);
@@ -279,15 +282,17 @@ static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *na
addr = get_mem_ctrl_hub_base_addr();
if (!addr)
return -ENODEV;
+ size = DNV_MCHBAR_SIZE;
} else {
/* MMIO via sideband register base address */
addr = get_sideband_reg_base_addr();
if (!addr)
return -ENODEV;
addr += (port << 16);
+ size = DNV_SB_PORT_SIZE;
}
- base = ioremap((resource_size_t)addr, 0x10000);
+ base = ioremap((resource_size_t)addr, size);
if (!base)
return -ENODEV;