diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/Kconfig | 7 | ||||
-rw-r--r-- | drivers/edac/Makefile | 1 | ||||
-rw-r--r-- | drivers/edac/altera_edac.c | 58 | ||||
-rw-r--r-- | drivers/edac/altera_edac.h | 25 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 371 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 15 | ||||
-rw-r--r-- | drivers/edac/bluefield_edac.c | 356 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 53 | ||||
-rw-r--r-- | drivers/edac/edac_mc.h | 6 | ||||
-rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 92 | ||||
-rw-r--r-- | drivers/edac/ghes_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/i5100_edac.c | 16 | ||||
-rw-r--r-- | drivers/edac/pnd2_edac.c | 7 |
13 files changed, 790 insertions, 219 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 200c04ce5b0e..2a2603bfb918 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -510,4 +510,11 @@ config EDAC_ASPEED First, ECC must be configured in the bootloader. Then, this driver will expose error counters via the EDAC kernel framework. +config EDAC_BLUEFIELD + tristate "Mellanox BlueField Memory ECC" + depends on ARM64 && ((MELLANOX_PLATFORM && ACPI) || COMPILE_TEST) + help + Support for error detection and correction on the + Mellanox BlueField SoCs. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 165ca65e1a3a..d265ff9311f0 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -85,3 +85,4 @@ obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o obj-$(CONFIG_EDAC_TI) += ti_edac.o obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o +obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index c2e693e34d43..fbda4b876afd 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -222,7 +222,6 @@ static unsigned long get_total_mem(void) static const struct of_device_id altr_sdram_ctrl_of_match[] = { { .compatible = "altr,sdram-edac", .data = &c5_data}, { .compatible = "altr,sdram-edac-a10", .data = &a10_data}, - { .compatible = "altr,sdram-edac-s10", .data = &a10_data}, {}, }; MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); @@ -1170,6 +1169,24 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) return 0; } +/*********************** SDRAM EDAC Device Functions *********************/ + +#ifdef CONFIG_EDAC_ALTERA_SDRAM + +static const struct edac_device_prv_data s10_sdramecc_data = { + .setup = altr_check_ecc_deps, + .ce_clear_mask = ALTR_S10_ECC_SERRPENA, + .ue_clear_mask = ALTR_S10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_S10_ECC_EN, + .ecc_en_ofst = ALTR_S10_ECC_CTRL_SDRAM_OFST, + .ce_set_mask = ALTR_S10_ECC_TSERRA, + .ue_set_mask = ALTR_S10_ECC_TDERRA, + .set_err_ofst = ALTR_S10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; +#endif /* CONFIG_EDAC_ALTERA_SDRAM */ + /*********************** OCRAM EDAC Device Functions *********************/ #ifdef CONFIG_EDAC_ALTERA_OCRAM @@ -1759,6 +1776,9 @@ static const struct of_device_id altr_edac_a10_device_of_match[] = { #ifdef CONFIG_EDAC_ALTERA_SDMMC { .compatible = "altr,socfpga-sdmmc-ecc", .data = &a10_sdmmcecca_data }, #endif +#ifdef CONFIG_EDAC_ALTERA_SDRAM + { .compatible = "altr,sdram-edac-s10", .data = &s10_sdramecc_data }, +#endif {}, }; MODULE_DEVICE_TABLE(of, altr_edac_a10_device_of_match); @@ -1866,6 +1886,7 @@ static void altr_edac_a10_irq_handler(struct irq_desc *desc) struct altr_arria10_edac *edac = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); int irq = irq_desc_get_irq(desc); + unsigned long bits; dberr = (irq == edac->db_irq) ? 1 : 0; sm_offset = dberr ? A10_SYSMGR_ECC_INTSTAT_DERR_OFST : @@ -1875,7 +1896,8 @@ static void altr_edac_a10_irq_handler(struct irq_desc *desc) regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status); - for_each_set_bit(bit, (unsigned long *)&irq_status, 32) { + bits = irq_status; + for_each_set_bit(bit, &bits, 32) { irq = irq_linear_revmap(edac->domain, dberr * 32 + bit); if (irq) generic_handle_irq(irq); @@ -1889,6 +1911,10 @@ static int validate_parent_available(struct device_node *np) struct device_node *parent; int ret = 0; + /* SDRAM must be present for Linux (implied parent) */ + if (of_device_is_compatible(np, "altr,sdram-edac-s10")) + return 0; + /* Ensure parent device is enabled if parent node exists */ parent = of_parse_phandle(np, "altr,ecc-parent", 0); if (parent && !of_device_is_available(parent)) @@ -1898,6 +1924,22 @@ static int validate_parent_available(struct device_node *np) return ret; } +static int get_s10_sdram_edac_resource(struct device_node *np, + struct resource *res) +{ + struct device_node *parent; + int ret; + + parent = of_parse_phandle(np, "altr,sdr-syscon", 0); + if (!parent) + return -ENODEV; + + ret = of_address_to_resource(parent, 0, res); + of_node_put(parent); + + return ret; +} + static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, struct device_node *np) { @@ -1925,7 +1967,11 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL)) return -ENOMEM; - rc = of_address_to_resource(np, 0, &res); + if (of_device_is_compatible(np, "altr,sdram-edac-s10")) + rc = get_s10_sdram_edac_resource(np, &res); + else + rc = of_address_to_resource(np, 0, &res); + if (rc < 0) { edac_printk(KERN_ERR, EDAC_DEVICE, "%s: no resource address\n", ecc_name); @@ -2231,13 +2277,15 @@ static int altr_edac_a10_probe(struct platform_device *pdev) of_device_is_compatible(child, "altr,socfpga-dma-ecc") || of_device_is_compatible(child, "altr,socfpga-usb-ecc") || of_device_is_compatible(child, "altr,socfpga-qspi-ecc") || +#ifdef CONFIG_EDAC_ALTERA_SDRAM + of_device_is_compatible(child, "altr,sdram-edac-s10") || +#endif of_device_is_compatible(child, "altr,socfpga-sdmmc-ecc")) altr_edac_a10_device_add(edac, child); #ifdef CONFIG_EDAC_ALTERA_SDRAM - else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) || - (of_device_is_compatible(child, "altr,sdram-edac-s10"))) + else if (of_device_is_compatible(child, "altr,sdram-edac-a10")) of_platform_populate(pdev->dev.of_node, altr_sdram_ctrl_of_match, NULL, &pdev->dev); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 55654cc4bcdf..3727e72c8c2e 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -289,6 +289,29 @@ struct altr_sdram_mc_data { #define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000 /************* Stratix10 Defines **************/ +#define ALTR_S10_ECC_CTRL_SDRAM_OFST 0x00 +#define ALTR_S10_ECC_EN BIT(0) + +#define ALTR_S10_ECC_ERRINTEN_OFST 0x10 +#define ALTR_S10_ECC_ERRINTENS_OFST 0x14 +#define ALTR_S10_ECC_ERRINTENR_OFST 0x18 +#define ALTR_S10_ECC_SERRINTEN BIT(0) + +#define ALTR_S10_ECC_INTMODE_OFST 0x1C +#define ALTR_S10_ECC_INTMODE BIT(0) + +#define ALTR_S10_ECC_INTSTAT_OFST 0x20 +#define ALTR_S10_ECC_SERRPENA BIT(0) +#define ALTR_S10_ECC_DERRPENA BIT(8) +#define ALTR_S10_ECC_ERRPENA_MASK (ALTR_S10_ECC_SERRPENA | \ + ALTR_S10_ECC_DERRPENA) + +#define ALTR_S10_ECC_INTTEST_OFST 0x24 +#define ALTR_S10_ECC_TSERRA BIT(0) +#define ALTR_S10_ECC_TDERRA BIT(8) +#define ALTR_S10_ECC_TSERRB BIT(16) +#define ALTR_S10_ECC_TDERRB BIT(24) + #define ALTR_S10_DERR_ADDRA_OFST 0x2C /* Stratix10 ECC Manager Defines */ @@ -300,7 +323,7 @@ struct altr_sdram_mc_data { #define S10_SYSMGR_UE_ADDR_OFST 0x224 #define S10_DDR0_IRQ_MASK BIT(16) -#define S10_DBE_IRQ_MASK 0x3FE +#define S10_DBE_IRQ_MASK 0x3FFFE /* Define ECC Block Offsets for peripherals */ #define ECC_BLK_ADDRESS_OFST 0x40 diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 873437be86d9..c1d4536ae466 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -788,51 +788,45 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) (dclr & BIT(15)) ? "yes" : "no"); } -/* - * The Address Mask should be a contiguous set of bits in the non-interleaved - * case. So to check for CS interleaving, find the most- and least-significant - * bits of the mask, generate a contiguous bitmask, and compare the two. - */ -static bool f17_cs_interleaved(struct amd64_pvt *pvt, u8 ctrl, int cs) +#define CS_EVEN_PRIMARY BIT(0) +#define CS_ODD_PRIMARY BIT(1) +#define CS_EVEN_SECONDARY BIT(2) +#define CS_ODD_SECONDARY BIT(3) + +#define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) +#define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) + +static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) { - u32 mask = pvt->csels[ctrl].csmasks[cs >> 1]; - u32 msb = fls(mask) - 1, lsb = ffs(mask) - 1; - u32 test_mask = GENMASK(msb, lsb); + int cs_mode = 0; - edac_dbg(1, "mask=0x%08x test_mask=0x%08x\n", mask, test_mask); + if (csrow_enabled(2 * dimm, ctrl, pvt)) + cs_mode |= CS_EVEN_PRIMARY; - return mask ^ test_mask; + if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_PRIMARY; + + /* Asymmetric dual-rank DIMM support. */ + if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_SECONDARY; + + return cs_mode; } static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) { - int dimm, size0, size1, cs0, cs1; + int dimm, size0, size1, cs0, cs1, cs_mode; edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); - for (dimm = 0; dimm < 4; dimm++) { - size0 = 0; + for (dimm = 0; dimm < 2; dimm++) { cs0 = dimm * 2; - - if (csrow_enabled(cs0, ctrl, pvt)) - size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs0); - - size1 = 0; cs1 = dimm * 2 + 1; - if (csrow_enabled(cs1, ctrl, pvt)) { - /* - * CS interleaving is only supported if both CSes have - * the same amount of memory. Because they are - * interleaved, it will look like both CSes have the - * full amount of memory. Save the size for both as - * half the amount we found on CS0, if interleaved. - */ - if (f17_cs_interleaved(pvt, ctrl, cs1)) - size1 = size0 = (size0 >> 1); - else - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1); - } + cs_mode = f17_get_cs_mode(dimm, ctrl, pvt); + + size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0); + size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", cs0, size0, @@ -942,89 +936,119 @@ static void prep_chip_selects(struct amd64_pvt *pvt) } else if (pvt->fam == 0x15 && pvt->model == 0x30) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; + } else if (pvt->fam >= 0x17) { + int umc; + + for_each_umc(umc) { + pvt->csels[umc].b_cnt = 4; + pvt->csels[umc].m_cnt = 2; + } + } else { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; } } +static void read_umc_base_mask(struct amd64_pvt *pvt) +{ + u32 umc_base_reg, umc_base_reg_sec; + u32 umc_mask_reg, umc_mask_reg_sec; + u32 base_reg, base_reg_sec; + u32 mask_reg, mask_reg_sec; + u32 *base, *base_sec; + u32 *mask, *mask_sec; + int cs, umc; + + for_each_umc(umc) { + umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR; + umc_base_reg_sec = get_umc_base(umc) + UMCCH_BASE_ADDR_SEC; + + for_each_chip_select(cs, umc, pvt) { + base = &pvt->csels[umc].csbases[cs]; + base_sec = &pvt->csels[umc].csbases_sec[cs]; + + base_reg = umc_base_reg + (cs * 4); + base_reg_sec = umc_base_reg_sec + (cs * 4); + + if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) + edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base, base_reg); + + if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, base_sec)) + edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base_sec, base_reg_sec); + } + + umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; + umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC; + + for_each_chip_select_mask(cs, umc, pvt) { + mask = &pvt->csels[umc].csmasks[cs]; + mask_sec = &pvt->csels[umc].csmasks_sec[cs]; + + mask_reg = umc_mask_reg + (cs * 4); + mask_reg_sec = umc_mask_reg_sec + (cs * 4); + + if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) + edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask, mask_reg); + + if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, mask_sec)) + edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask_sec, mask_reg_sec); + } + } +} + /* * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers */ static void read_dct_base_mask(struct amd64_pvt *pvt) { - int base_reg0, base_reg1, mask_reg0, mask_reg1, cs; + int cs; prep_chip_selects(pvt); - if (pvt->umc) { - base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR; - base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR; - mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK; - mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK; - } else { - base_reg0 = DCSB0; - base_reg1 = DCSB1; - mask_reg0 = DCSM0; - mask_reg1 = DCSM1; - } + if (pvt->umc) + return read_umc_base_mask(pvt); for_each_chip_select(cs, 0, pvt) { - int reg0 = base_reg0 + (cs * 4); - int reg1 = base_reg1 + (cs * 4); + int reg0 = DCSB0 + (cs * 4); + int reg1 = DCSB1 + (cs * 4); u32 *base0 = &pvt->csels[0].csbases[cs]; u32 *base1 = &pvt->csels[1].csbases[cs]; - if (pvt->umc) { - if (!amd_smn_read(pvt->mc_node_id, reg0, base0)) - edac_dbg(0, " DCSB0[%d]=0x%08x reg: 0x%x\n", - cs, *base0, reg0); + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) + edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", + cs, *base0, reg0); - if (!amd_smn_read(pvt->mc_node_id, reg1, base1)) - edac_dbg(0, " DCSB1[%d]=0x%08x reg: 0x%x\n", - cs, *base1, reg1); - } else { - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) - edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", - cs, *base0, reg0); - - if (pvt->fam == 0xf) - continue; + if (pvt->fam == 0xf) + continue; - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) - edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", - cs, *base1, (pvt->fam == 0x10) ? reg1 - : reg0); - } + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) + edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", + cs, *base1, (pvt->fam == 0x10) ? reg1 + : reg0); } for_each_chip_select_mask(cs, 0, pvt) { - int reg0 = mask_reg0 + (cs * 4); - int reg1 = mask_reg1 + (cs * 4); + int reg0 = DCSM0 + (cs * 4); + int reg1 = DCSM1 + (cs * 4); u32 *mask0 = &pvt->csels[0].csmasks[cs]; u32 *mask1 = &pvt->csels[1].csmasks[cs]; - if (pvt->umc) { - if (!amd_smn_read(pvt->mc_node_id, reg0, mask0)) - edac_dbg(0, " DCSM0[%d]=0x%08x reg: 0x%x\n", - cs, *mask0, reg0); - - if (!amd_smn_read(pvt->mc_node_id, reg1, mask1)) - edac_dbg(0, " DCSM1[%d]=0x%08x reg: 0x%x\n", - cs, *mask1, reg1); - } else { - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) - edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", - cs, *mask0, reg0); + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) + edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", + cs, *mask0, reg0); - if (pvt->fam == 0xf) - continue; + if (pvt->fam == 0xf) + continue; - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) - edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", - cs, *mask1, (pvt->fam == 0x10) ? reg1 - : reg0); - } + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) + edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", + cs, *mask1, (pvt->fam == 0x10) ? reg1 + : reg0); } } @@ -1556,18 +1580,58 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, return ddr3_cs_size(cs_mode, false); } -static int f17_base_addr_to_cs_size(struct amd64_pvt *pvt, u8 umc, +static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsigned int cs_mode, int csrow_nr) { - u32 base_addr = pvt->csels[umc].csbases[csrow_nr]; + u32 addr_mask_orig, addr_mask_deinterleaved; + u32 msb, weight, num_zero_bits; + int dimm, size = 0; - /* Each mask is used for every two base addresses. */ - u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr >> 1]; + /* No Chip Selects are enabled. */ + if (!cs_mode) + return size; - /* Register [31:1] = Address [39:9]. Size is in kBs here. */ - u32 size = ((addr_mask >> 1) - (base_addr >> 1) + 1) >> 1; + /* Requested size of an even CS but none are enabled. */ + if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1)) + return size; - edac_dbg(1, "BaseAddr: 0x%x, AddrMask: 0x%x\n", base_addr, addr_mask); + /* Requested size of an odd CS but none are enabled. */ + if (!(cs_mode & CS_ODD) && (csrow_nr & 1)) + return size; + + /* + * There is one mask per DIMM, and two Chip Selects per DIMM. + * CS0 and CS1 -> DIMM0 + * CS2 and CS3 -> DIMM1 + */ + dimm = csrow_nr >> 1; + + /* Asymmetric dual-rank DIMM support. */ + if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) + addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm]; + else + addr_mask_orig = pvt->csels[umc].csmasks[dimm]; + + /* + * The number of zero bits in the mask is equal to the number of bits + * in a full mask minus the number of bits in the current mask. + * + * The MSB is the number of bits in the full mask because BIT[0] is + * always 0. + */ + msb = fls(addr_mask_orig) - 1; + weight = hweight_long(addr_mask_orig); + num_zero_bits = msb - weight; + + /* Take the number of zero bits off from the top of the mask. */ + addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); + + edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); + edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); + edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); + + /* Register [31:1] = Address [39:9]. Size is in kBs here. */ + size = (addr_mask_deinterleaved >> 2) + 1; /* Return size in MBs. */ return size >> 10; @@ -2232,7 +2296,7 @@ static struct amd64_family_type family_types[] = { .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, [F17_M10H_CPUS] = { @@ -2241,7 +2305,7 @@ static struct amd64_family_type family_types[] = { .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, [F17_M30H_CPUS] = { @@ -2250,7 +2314,16 @@ static struct amd64_family_type family_types[] = { .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, + [F17_M70H_CPUS] = { + .ctl_name = "F17h_M70h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, }; @@ -2537,13 +2610,6 @@ static void decode_umc_error(int node_id, struct mce *m) err.channel = find_umc_channel(m); - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { - err.err_code = ERR_NORM_ADDR; - goto log_error; - } - - error_address_to_page_and_offset(sys_addr, &err); - if (!(m->status & MCI_STATUS_SYNDV)) { err.err_code = ERR_SYND; goto log_error; @@ -2560,6 +2626,13 @@ static void decode_umc_error(int node_id, struct mce *m) err.csrow = m->synd & 0x7; + if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { + err.err_code = ERR_NORM_ADDR; + goto log_error; + } + + error_address_to_page_and_offset(sys_addr, &err); + log_error: __log_ecc_error(mci, &err, ecc_type); } @@ -2809,10 +2882,12 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) int csrow_nr = csrow_nr_orig; u32 cs_mode, nr_pages; - if (!pvt->umc) + if (!pvt->umc) { csrow_nr >>= 1; - - cs_mode = DBAM_DIMM(csrow_nr, dbam); + cs_mode = DBAM_DIMM(csrow_nr, dbam); + } else { + cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt); + } nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); nr_pages <<= 20 - PAGE_SHIFT; @@ -2824,6 +2899,49 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) return nr_pages; } +static int init_csrows_df(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + enum edac_type edac_mode = EDAC_NONE; + enum dev_type dev_type = DEV_UNKNOWN; + struct dimm_info *dimm; + int empty = 1; + u8 umc, cs; + + if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) { + edac_mode = EDAC_S16ECD16ED; + dev_type = DEV_X16; + } else if (mci->edac_ctl_cap & EDAC_FLAG_S8ECD8ED) { + edac_mode = EDAC_S8ECD8ED; + dev_type = DEV_X8; + } else if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) { + edac_mode = EDAC_S4ECD4ED; + dev_type = DEV_X4; + } else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) { + edac_mode = EDAC_SECDED; + } + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + if (!csrow_enabled(cs, umc, pvt)) + continue; + + empty = 0; + dimm = mci->csrows[cs]->channels[umc]->dimm; + + edac_dbg(1, "MC node: %d, csrow: %d\n", + pvt->mc_node_id, cs); + + dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); + dimm->mtype = pvt->dram_type; + dimm->edac_mode = edac_mode; + dimm->dtype = dev_type; + } + } + + return empty; +} + /* * Initialize the array of csrow attribute instances, based on the values * from pci config hardware registers. @@ -2838,15 +2956,16 @@ static int init_csrows(struct mem_ctl_info *mci) int nr_pages = 0; u32 val; - if (!pvt->umc) { - amd64_read_pci_cfg(pvt->F3, NBCFG, &val); + if (pvt->umc) + return init_csrows_df(mci); - pvt->nbcfg = val; + amd64_read_pci_cfg(pvt->F3, NBCFG, &val); - edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", - pvt->mc_node_id, val, - !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); - } + pvt->nbcfg = val; + + edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", + pvt->mc_node_id, val, + !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); /* * We iterate over DCT0 here but we look at DCT1 in parallel, if needed. @@ -2883,13 +3002,7 @@ static int init_csrows(struct mem_ctl_info *mci) edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); /* Determine DIMM ECC mode: */ - if (pvt->umc) { - if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) - edac_mode = EDAC_S4ECD4ED; - else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) - edac_mode = EDAC_SECDED; - - } else if (pvt->nbcfg & NBCFG_ECC_ENABLE) { + if (pvt->nbcfg & NBCFG_ECC_ENABLE) { edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ? EDAC_S4ECD4ED : EDAC_SECDED; @@ -3137,12 +3250,15 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid) static inline void f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) { - u8 i, ecc_en = 1, cpk_en = 1; + u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1; for_each_umc(i) { if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED); cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP); + + dev_x4 &= !!(pvt->umc[i].dimm_cfg & BIT(6)); + dev_x16 &= !!(pvt->umc[i].dimm_cfg & BIT(7)); } } @@ -3150,8 +3266,15 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) if (ecc_en) { mci->edac_ctl_cap |= EDAC_FLAG_SECDED; - if (cpk_en) + if (!cpk_en) + return; + + if (dev_x4) mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; + else if (dev_x16) + mci->edac_ctl_cap |= EDAC_FLAG_S16ECD16ED; + else + mci->edac_ctl_cap |= EDAC_FLAG_S8ECD8ED; } } @@ -3241,6 +3364,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) fam_type = &family_types[F17_M30H_CPUS]; pvt->ops = &family_types[F17_M30H_CPUS].ops; break; + } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) { + fam_type = &family_types[F17_M70H_CPUS]; + pvt->ops = &family_types[F17_M70H_CPUS].ops; + break; } /* fall through */ case 0x18: diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 8f66472f7adc..8c3cda81e619 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -96,6 +96,7 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 +#define NUM_CONTROLLERS 8 #define ON true #define OFF false @@ -119,6 +120,8 @@ #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496 +#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440 +#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 /* * Function 1 - Address Map @@ -168,7 +171,8 @@ #define DCSM0 0x60 #define DCSM1 0x160 -#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define csrow_sec_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases_sec[(i)] & DCSB_CS_ENABLE) #define DRAM_CONTROL 0x78 @@ -258,7 +262,9 @@ /* UMC CH register offsets */ #define UMCCH_BASE_ADDR 0x0 +#define UMCCH_BASE_ADDR_SEC 0x10 #define UMCCH_ADDR_MASK 0x20 +#define UMCCH_ADDR_MASK_SEC 0x28 #define UMCCH_ADDR_CFG 0x30 #define UMCCH_DIMM_CFG 0x80 #define UMCCH_UMC_CFG 0x100 @@ -285,6 +291,7 @@ enum amd_families { F17_CPUS, F17_M10H_CPUS, F17_M30H_CPUS, + F17_M70H_CPUS, NUM_FAMILIES, }; @@ -311,9 +318,11 @@ struct dram_range { /* A DCT chip selects collection */ struct chip_select { u32 csbases[NUM_CHIPSELECTS]; + u32 csbases_sec[NUM_CHIPSELECTS]; u8 b_cnt; u32 csmasks[NUM_CHIPSELECTS]; + u32 csmasks_sec[NUM_CHIPSELECTS]; u8 m_cnt; }; @@ -351,8 +360,8 @@ struct amd64_pvt { u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */ u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */ - /* one for each DCT */ - struct chip_select csels[2]; + /* one for each DCT/UMC */ + struct chip_select csels[NUM_CONTROLLERS]; /* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */ struct dram_range ranges[DRAM_RANGES]; diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c new file mode 100644 index 000000000000..e4736eb37bfb --- /dev/null +++ b/drivers/edac/bluefield_edac.c @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Bluefield-specific EDAC driver. + * + * Copyright (c) 2019 Mellanox Technologies. + */ + +#include <linux/acpi.h> +#include <linux/arm-smccc.h> +#include <linux/bitfield.h> +#include <linux/edac.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +#include "edac_module.h" + +#define DRIVER_NAME "bluefield-edac" + +/* + * Mellanox BlueField EMI (External Memory Interface) register definitions. + */ + +#define MLXBF_ECC_CNT 0x340 +#define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0) +#define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16) + +#define MLXBF_ECC_ERR 0x348 +#define MLXBF_ECC_ERR__SECC BIT(0) +#define MLXBF_ECC_ERR__DECC BIT(16) + +#define MLXBF_ECC_LATCH_SEL 0x354 +#define MLXBF_ECC_LATCH_SEL__START BIT(24) + +#define MLXBF_ERR_ADDR_0 0x358 + +#define MLXBF_ERR_ADDR_1 0x37c + +#define MLXBF_SYNDROM 0x35c +#define MLXBF_SYNDROM__DERR BIT(0) +#define MLXBF_SYNDROM__SERR BIT(1) +#define MLXBF_SYNDROM__SYN GENMASK(25, 16) + +#define MLXBF_ADD_INFO 0x364 +#define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8) + +#define MLXBF_EDAC_MAX_DIMM_PER_MC 2 +#define MLXBF_EDAC_ERROR_GRAIN 8 + +/* + * Request MLNX_SIP_GET_DIMM_INFO + * + * Retrieve information about DIMM on a certain slot. + * + * Call register usage: + * a0: MLNX_SIP_GET_DIMM_INFO + * a1: (Memory controller index) << 16 | (Dimm index in memory controller) + * a2-7: not used. + * + * Return status: + * a0: MLXBF_DIMM_INFO defined below describing the DIMM. + * a1-3: not used. + */ +#define MLNX_SIP_GET_DIMM_INFO 0x82000008 + +/* Format for the SMC response about the memory information */ +#define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) +#define MLXBF_DIMM_INFO__IS_RDIMM BIT(16) +#define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17) +#define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18) +#define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21) +#define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) + +struct bluefield_edac_priv { + int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; + void __iomem *emi_base; + int dimm_per_mc; +}; + +static u64 smc_call1(u64 smc_op, u64 smc_arg) +{ + struct arm_smccc_res res; + + arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + +/* + * Gather the ECC information from the External Memory Interface registers + * and report it to the edac handler. + */ +static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, + int error_cnt, + int is_single_ecc) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + u32 dram_additional_info, err_prank, edea0, edea1; + u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; + enum hw_event_mc_err_type ecc_type; + u64 ecc_dimm_addr; + int ecc_dimm; + + ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : + HW_EVENT_ERR_UNCORRECTED; + + /* + * Tell the External Memory Interface to populate the relevant + * registers with information about the last ECC error occurrence. + */ + ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; + writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL); + + /* + * Verify that the ECC reported info in the registers is of the + * same type as the one asked to report. If not, just report the + * error without the detailed information. + */ + dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM); + serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); + derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); + syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); + + if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) { + edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0, + 0, 0, -1, mci->ctl_name, ""); + return; + } + + dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO); + err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); + + ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; + + edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0); + edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1); + + ecc_dimm_addr = ((u64)edea1 << 32) | edea0; + + edac_mc_handle_error(ecc_type, mci, error_cnt, + PFN_DOWN(ecc_dimm_addr), + offset_in_page(ecc_dimm_addr), + syndrom, ecc_dimm, 0, 0, mci->ctl_name, ""); +} + +static void bluefield_edac_check(struct mem_ctl_info *mci) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; + + /* + * The memory controller might not be initialized by the firmware + * when there isn't memory, which may lead to bad register readings. + */ + if (mci->edac_cap == EDAC_FLAG_NONE) + return; + + ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT); + single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); + double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); + + if (single_error_count) { + ecc_error |= MLXBF_ECC_ERR__SECC; + + bluefield_gather_report_ecc(mci, single_error_count, 1); + } + + if (double_error_count) { + ecc_error |= MLXBF_ECC_ERR__DECC; + + bluefield_gather_report_ecc(mci, double_error_count, 0); + } + + /* Write to clear reported errors. */ + if (ecc_count) + writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR); +} + +/* Initialize the DIMMs information for the given memory controller. */ +static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + int mem_ctrl_idx = mci->mc_idx; + struct dimm_info *dimm; + u64 smc_info, smc_arg; + int is_empty = 1, i; + + for (i = 0; i < priv->dimm_per_mc; i++) { + dimm = mci->dimms[i]; + + smc_arg = mem_ctrl_idx << 16 | i; + smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg); + + if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { + dimm->mtype = MEM_EMPTY; + continue; + } + + is_empty = 0; + + dimm->edac_mode = EDAC_SECDED; + + if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info)) + dimm->mtype = MEM_NVDIMM; + else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info)) + dimm->mtype = MEM_LRDDR4; + else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info)) + dimm->mtype = MEM_RDDR4; + else + dimm->mtype = MEM_DDR4; + + dimm->nr_pages = + FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) * + (SZ_1G / PAGE_SIZE); + dimm->grain = MLXBF_EDAC_ERROR_GRAIN; + + /* Mem controller for BlueField only supports x4, x8 and x16 */ + switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) { + case 4: + dimm->dtype = DEV_X4; + break; + case 8: + dimm->dtype = DEV_X8; + break; + case 16: + dimm->dtype = DEV_X16; + break; + default: + dimm->dtype = DEV_UNKNOWN; + } + + priv->dimm_ranks[i] = + FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info); + } + + if (is_empty) + mci->edac_cap = EDAC_FLAG_NONE; + else + mci->edac_cap = EDAC_FLAG_SECDED; +} + +static int bluefield_edac_mc_probe(struct platform_device *pdev) +{ + struct bluefield_edac_priv *priv; + struct device *dev = &pdev->dev; + struct edac_mc_layer layers[1]; + struct mem_ctl_info *mci; + struct resource *emi_res; + unsigned int mc_idx, dimm_count; + int rc, ret; + + /* Read the MSS (Memory SubSystem) index from ACPI table. */ + if (device_property_read_u32(dev, "mss_number", &mc_idx)) { + dev_warn(dev, "bf_edac: MSS number unknown\n"); + return -EINVAL; + } + + /* Read the DIMMs per MC from ACPI table. */ + if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) { + dev_warn(dev, "bf_edac: DIMMs per MC unknown\n"); + return -EINVAL; + } + + if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) { + dev_warn(dev, "bf_edac: DIMMs per MC not valid\n"); + return -EINVAL; + } + + emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!emi_res) + return -EINVAL; + + layers[0].type = EDAC_MC_LAYER_SLOT; + layers[0].size = dimm_count; + layers[0].is_virt_csrow = true; + + mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv)); + if (!mci) + return -ENOMEM; + + priv = mci->pvt_info; + + priv->dimm_per_mc = dimm_count; + priv->emi_base = devm_ioremap_resource(dev, emi_res); + if (IS_ERR(priv->emi_base)) { + dev_err(dev, "failed to map EMI IO resource\n"); + ret = PTR_ERR(priv->emi_base); + goto err; + } + + mci->pdev = dev; + mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | + MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + + mci->mod_name = DRIVER_NAME; + mci->ctl_name = "BlueField_Memory_Controller"; + mci->dev_name = dev_name(dev); + mci->edac_check = bluefield_edac_check; + + /* Initialize mci with the actual populated DIMM information. */ + bluefield_edac_init_dimms(mci); + + platform_set_drvdata(pdev, mci); + + /* Register with EDAC core */ + rc = edac_mc_add_mc(mci); + if (rc) { + dev_err(dev, "failed to register with EDAC core\n"); + ret = rc; + goto err; + } + + /* Only POLL mode supported so far. */ + edac_op_state = EDAC_OPSTATE_POLL; + + return 0; + +err: + edac_mc_free(mci); + + return ret; + +} + +static int bluefield_edac_mc_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + + return 0; +} + +static const struct acpi_device_id bluefield_mc_acpi_ids[] = { + {"MLNXBF08", 0}, + {} +}; + +MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids); + +static struct platform_driver bluefield_edac_mc_driver = { + .driver = { + .name = DRIVER_NAME, + .acpi_match_table = bluefield_mc_acpi_ids, + }, + .probe = bluefield_edac_mc_probe, + .remove = bluefield_edac_mc_remove, +}; + +module_platform_driver(bluefield_edac_mc_driver); + +MODULE_DESCRIPTION("Mellanox BlueField memory edac driver"); +MODULE_AUTHOR("Mellanox Technologies"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 64922c8fa7e3..e6fd079783bd 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -114,8 +114,8 @@ static const struct kernel_param_ops edac_report_ops = { module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644); -unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, - unsigned len) +unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf, + unsigned int len) { struct mem_ctl_info *mci = dimm->mci; int i, n, count = 0; @@ -236,9 +236,9 @@ EXPORT_SYMBOL_GPL(edac_mem_types); * At return, the pointer 'p' will be incremented to be used on a next call * to this function. */ -void *edac_align_ptr(void **p, unsigned size, int n_elems) +void *edac_align_ptr(void **p, unsigned int size, int n_elems) { - unsigned align, r; + unsigned int align, r; void *ptr = *p; *p += size * n_elems; @@ -275,38 +275,37 @@ void *edac_align_ptr(void **p, unsigned size, int n_elems) static void _edac_mc_free(struct mem_ctl_info *mci) { - int i, chn, row; struct csrow_info *csr; - const unsigned int tot_dimms = mci->tot_dimms; - const unsigned int tot_channels = mci->num_cschannel; - const unsigned int tot_csrows = mci->nr_csrows; + int i, chn, row; if (mci->dimms) { - for (i = 0; i < tot_dimms; i++) + for (i = 0; i < mci->tot_dimms; i++) kfree(mci->dimms[i]); kfree(mci->dimms); } + if (mci->csrows) { - for (row = 0; row < tot_csrows; row++) { + for (row = 0; row < mci->nr_csrows; row++) { csr = mci->csrows[row]; - if (csr) { - if (csr->channels) { - for (chn = 0; chn < tot_channels; chn++) - kfree(csr->channels[chn]); - kfree(csr->channels); - } - kfree(csr); + if (!csr) + continue; + + if (csr->channels) { + for (chn = 0; chn < mci->num_cschannel; chn++) + kfree(csr->channels[chn]); + kfree(csr->channels); } + kfree(csr); } kfree(mci->csrows); } kfree(mci); } -struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, - unsigned n_layers, +struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, + unsigned int n_layers, struct edac_mc_layer *layers, - unsigned sz_pvt) + unsigned int sz_pvt) { struct mem_ctl_info *mci; struct edac_mc_layer *layer; @@ -314,9 +313,9 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, struct rank_info *chan; struct dimm_info *dimm; u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; - unsigned pos[EDAC_MAX_LAYERS]; - unsigned size, tot_dimms = 1, count = 1; - unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; + unsigned int pos[EDAC_MAX_LAYERS]; + unsigned int size, tot_dimms = 1, count = 1; + unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0; void *pvt, *p, *ptr = NULL; int i, j, row, chn, n, len, off; bool per_rank = false; @@ -1235,9 +1234,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, if (p > e->location) *(p - 1) = '\0'; - /* Report the error via the trace interface */ - grain_bits = fls_long(e->grain) + 1; + /* Sanity-check driver-supplied grain value. */ + if (WARN_ON_ONCE(!e->grain)) + e->grain = 1; + grain_bits = fls_long(e->grain - 1); + + /* Report the error via the trace interface */ if (IS_ENABLED(CONFIG_RAS)) trace_mc_event(type, e->msg, e->label, e->error_count, mci->mc_idx, e->top_layer, e->mid_layer, diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_mc.h index 4165e15995ad..02aac5c61d00 100644 --- a/drivers/edac/edac_mc.h +++ b/drivers/edac/edac_mc.h @@ -122,10 +122,10 @@ do { \ * On success, return a pointer to struct mem_ctl_info pointer; * %NULL otherwise */ -struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, - unsigned n_layers, +struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, + unsigned int n_layers, struct edac_mc_layer *layers, - unsigned sz_pvt); + unsigned int sz_pvt); /** * edac_get_owner - Return the owner's mod_name of EDAC MC diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 4386ea4b9b5a..32d016f1ecd1 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -131,7 +131,7 @@ static const char * const edac_caps[] = { struct dev_ch_attribute { struct device_attribute attr; - int channel; + unsigned int channel; }; #define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \ @@ -200,7 +200,7 @@ static ssize_t channel_dimm_label_show(struct device *dev, char *data) { struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); + unsigned int chan = to_channel(mattr); struct rank_info *rank = csrow->channels[chan]; /* if field has not been initialized, there is nothing to send */ @@ -216,7 +216,7 @@ static ssize_t channel_dimm_label_store(struct device *dev, const char *data, size_t count) { struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); + unsigned int chan = to_channel(mattr); struct rank_info *rank = csrow->channels[chan]; size_t copy_count = count; @@ -240,7 +240,7 @@ static ssize_t channel_ce_count_show(struct device *dev, struct device_attribute *mattr, char *data) { struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); + unsigned int chan = to_channel(mattr); struct rank_info *rank = csrow->channels[chan]; return sprintf(data, "%u\n", rank->ce_count); @@ -278,7 +278,7 @@ static void csrow_attr_release(struct device *dev) { struct csrow_info *csrow = container_of(dev, struct csrow_info, dev); - edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev)); + edac_dbg(1, "device %s released\n", dev_name(dev)); kfree(csrow); } @@ -414,14 +414,16 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci, dev_set_name(&csrow->dev, "csrow%d", index); dev_set_drvdata(&csrow->dev, csrow); - edac_dbg(0, "creating (virtual) csrow node %s\n", - dev_name(&csrow->dev)); - err = device_add(&csrow->dev); - if (err) + if (err) { + edac_dbg(1, "failure: create device %s\n", dev_name(&csrow->dev)); put_device(&csrow->dev); + return err; + } - return err; + edac_dbg(0, "device %s created\n", dev_name(&csrow->dev)); + + return 0; } /* Create a CSROW object under specifed edac_mc_device */ @@ -435,12 +437,8 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci) if (!nr_pages_per_csrow(csrow)) continue; err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) { - edac_dbg(1, - "failure: create csrow objects for csrow %d\n", - i); + if (err < 0) goto error; - } } return 0; @@ -624,7 +622,7 @@ static void dimm_attr_release(struct device *dev) { struct dimm_info *dimm = container_of(dev, struct dimm_info, dev); - edac_dbg(1, "Releasing dimm device %s\n", dev_name(dev)); + edac_dbg(1, "device %s released\n", dev_name(dev)); kfree(dimm); } @@ -653,12 +651,21 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci, pm_runtime_forbid(&mci->dev); err = device_add(&dimm->dev); - if (err) + if (err) { + edac_dbg(1, "failure: create device %s\n", dev_name(&dimm->dev)); put_device(&dimm->dev); + return err; + } - edac_dbg(0, "created rank/dimm device %s\n", dev_name(&dimm->dev)); + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + char location[80]; - return err; + edac_dimm_info_location(dimm, location, sizeof(location)); + edac_dbg(0, "device %s created at location %s\n", + dev_name(&dimm->dev), location); + } + + return 0; } /* @@ -901,7 +908,7 @@ static void mci_attr_release(struct device *dev) { struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev); - edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev)); + edac_dbg(1, "device %s released\n", dev_name(dev)); kfree(mci); } @@ -933,14 +940,15 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, dev_set_drvdata(&mci->dev, mci); pm_runtime_forbid(&mci->dev); - edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); err = device_add(&mci->dev); if (err < 0) { edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); put_device(&mci->dev); - goto out; + return err; } + edac_dbg(0, "device %s created\n", dev_name(&mci->dev)); + /* * Create the dimm/rank devices */ @@ -950,22 +958,9 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, if (!dimm->nr_pages) continue; -#ifdef CONFIG_EDAC_DEBUG - edac_dbg(1, "creating dimm%d, located at ", i); - if (edac_debug_level >= 1) { - int lay; - for (lay = 0; lay < mci->n_layers; lay++) - printk(KERN_CONT "%s %d ", - edac_layer_name[mci->layers[lay].type], - dimm->location[lay]); - printk(KERN_CONT "\n"); - } -#endif err = edac_create_dimm_object(mci, dimm, i); - if (err) { - edac_dbg(1, "failure: create dimm %d obj\n", i); + if (err) goto fail_unregister_dimm; - } } #ifdef CONFIG_EDAC_LEGACY_SYSFS @@ -987,7 +982,6 @@ fail_unregister_dimm: } device_unregister(&mci->dev); -out: return err; } @@ -1011,14 +1005,14 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) struct dimm_info *dimm = mci->dimms[i]; if (dimm->nr_pages == 0) continue; - edac_dbg(0, "removing device %s\n", dev_name(&dimm->dev)); + edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev)); device_unregister(&dimm->dev); } } void edac_unregister_sysfs(struct mem_ctl_info *mci) { - edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); + edac_dbg(1, "unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); } @@ -1029,7 +1023,7 @@ static void mc_attr_release(struct device *dev) * parent device, used to create the /sys/devices/mc sysfs node. * So, there are no attributes on it. */ - edac_dbg(1, "Releasing device %s\n", dev_name(dev)); + edac_dbg(1, "device %s released\n", dev_name(dev)); kfree(dev); } @@ -1044,10 +1038,8 @@ int __init edac_mc_sysfs_init(void) int err; mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL); - if (!mci_pdev) { - err = -ENOMEM; - goto out; - } + if (!mci_pdev) + return -ENOMEM; mci_pdev->bus = edac_get_sysfs_subsys(); mci_pdev->type = &mc_attr_type; @@ -1055,17 +1047,15 @@ int __init edac_mc_sysfs_init(void) dev_set_name(mci_pdev, "mc"); err = device_add(mci_pdev); - if (err < 0) - goto out_put_device; + if (err < 0) { + edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev)); + put_device(mci_pdev); + return err; + } edac_dbg(0, "device %s created\n", dev_name(mci_pdev)); return 0; - - out_put_device: - put_device(mci_pdev); - out: - return err; } void edac_mc_sysfs_exit(void) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 7f19f1c672c3..d413a0bdc9ad 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -68,7 +68,7 @@ struct memdev_dmi_entry { struct ghes_edac_dimm_fill { struct mem_ctl_info *mci; - unsigned count; + unsigned int count; }; static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index b506eef6b146..251f2b692785 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -417,7 +417,8 @@ static const char *i5100_err_msg(unsigned err) } /* convert csrow index into a rank (per channel -- 0..5) */ -static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow) +static unsigned int i5100_csrow_to_rank(const struct mem_ctl_info *mci, + unsigned int csrow) { const struct i5100_priv *priv = mci->pvt_info; @@ -425,7 +426,8 @@ static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow) } /* convert csrow index into a channel (0..1) */ -static int i5100_csrow_to_chan(const struct mem_ctl_info *mci, int csrow) +static unsigned int i5100_csrow_to_chan(const struct mem_ctl_info *mci, + unsigned int csrow) { const struct i5100_priv *priv = mci->pvt_info; @@ -653,11 +655,11 @@ static struct pci_dev *pci_get_device_func(unsigned vendor, return ret; } -static unsigned long i5100_npages(struct mem_ctl_info *mci, int csrow) +static unsigned long i5100_npages(struct mem_ctl_info *mci, unsigned int csrow) { struct i5100_priv *priv = mci->pvt_info; - const unsigned chan_rank = i5100_csrow_to_rank(mci, csrow); - const unsigned chan = i5100_csrow_to_chan(mci, csrow); + const unsigned int chan_rank = i5100_csrow_to_rank(mci, csrow); + const unsigned int chan = i5100_csrow_to_chan(mci, csrow); unsigned addr_lines; /* dimm present? */ @@ -852,8 +854,8 @@ static void i5100_init_csrows(struct mem_ctl_info *mci) for (i = 0; i < mci->tot_dimms; i++) { struct dimm_info *dimm; const unsigned long npages = i5100_npages(mci, i); - const unsigned chan = i5100_csrow_to_chan(mci, i); - const unsigned rank = i5100_csrow_to_rank(mci, i); + const unsigned int chan = i5100_csrow_to_chan(mci, i); + const unsigned int rank = i5100_csrow_to_rank(mci, i); if (!npages) continue; diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index a6846be0d2eb..b1193be1ef1d 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -260,11 +260,14 @@ static u64 get_sideband_reg_base_addr(void) } } +#define DNV_MCHBAR_SIZE 0x8000 +#define DNV_SB_PORT_SIZE 0x10000 static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *name) { struct pci_dev *pdev; char *base; u64 addr; + unsigned long size; if (op == 4) { pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL); @@ -279,15 +282,17 @@ static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *na addr = get_mem_ctrl_hub_base_addr(); if (!addr) return -ENODEV; + size = DNV_MCHBAR_SIZE; } else { /* MMIO via sideband register base address */ addr = get_sideband_reg_base_addr(); if (!addr) return -ENODEV; addr += (port << 16); + size = DNV_SB_PORT_SIZE; } - base = ioremap((resource_size_t)addr, 0x10000); + base = ioremap((resource_size_t)addr, size); if (!base) return -ENODEV; |