diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-16 23:38:45 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-16 23:38:45 +0300 |
commit | ff881842e12563f25d69a4e2e373f1ad392a7027 (patch) | |
tree | 8dc850f99c3d42b32fb953d72ae9551cf6741ecb /drivers | |
parent | a7bd4bcf138e7ec95c00d55fee158f6be378029b (diff) | |
parent | 3e443eb353eda6f4b4796e07f2599683fa752f1d (diff) | |
download | linux-ff881842e12563f25d69a4e2e373f1ad392a7027.tar.xz |
Merge tag 'edac_for_5.4' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC updates from Borislav Petkov:
"The new thing this time around is that we have three maintainers now
and a new, old repo. New because it is new for the EDAC tree which is
hosted there from now on and old because it is Tony's and mine's old
RAS repo which we still use occasionally when the stuff isn't in tip.
Summary:
- EDAC tree has three maintainers and one new designated reviewer
now, so that the work can scale better.
- New driver for Mellanox' BlueField SoC DDR controller (Shravan
Kumar Ramani)
- AMD Rome support in amd64_edac (Yazen Ghannam and Isaac Vaughn)
- Misc fixes, cleanups and code improvements"
* tag 'edac_for_5.4' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
EDAC/amd64: Add PCI device IDs for family 17h, model 70h
MAINTAINERS: Add Robert as a EDAC reviewer
EDAC/mc_sysfs: Make debug messages consistent
EDAC/mc_sysfs: Remove pointless gotos
EDAC: Prefer 'unsigned int' to bare use of 'unsigned'
EDAC/amd64: Support asymmetric dual-rank DIMMs
EDAC/amd64: Cache secondary Chip Select registers
EDAC/amd64: Decode syndrome before translating address
EDAC/amd64: Find Chip Select memory size using Address Mask
EDAC/amd64: Initialize DIMM info for systems with more than two channels
EDAC/amd64: Recognize DRAM device type ECC capability
EDAC/amd64: Support more than two controllers for chip selects handling
EDAC/mc: Cleanup _edac_mc_free() code
EDAC, pnd2: Fix ioremap() size in dnv_rd_reg()
EDAC, mellanox: Add ECC support for BlueField DDR4
EDAC/altera: Use the proper type for the IRQ status bits
EDAC/mc: Fix grain_bits calculation
edac: altera: Move Stratix10 SDRAM ECC to peripheral
MAINTAINERS: update EDAC entry to reflect current tree and maintainers
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/edac/Kconfig | 7 | ||||
-rw-r--r-- | drivers/edac/Makefile | 1 | ||||
-rw-r--r-- | drivers/edac/altera_edac.c | 58 | ||||
-rw-r--r-- | drivers/edac/altera_edac.h | 25 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 371 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 15 | ||||
-rw-r--r-- | drivers/edac/bluefield_edac.c | 356 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 53 | ||||
-rw-r--r-- | drivers/edac/edac_mc.h | 6 | ||||
-rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 92 | ||||
-rw-r--r-- | drivers/edac/ghes_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/i5100_edac.c | 16 | ||||
-rw-r--r-- | drivers/edac/pnd2_edac.c | 7 |
13 files changed, 790 insertions, 219 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 200c04ce5b0e..2a2603bfb918 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -510,4 +510,11 @@ config EDAC_ASPEED First, ECC must be configured in the bootloader. Then, this driver will expose error counters via the EDAC kernel framework. +config EDAC_BLUEFIELD + tristate "Mellanox BlueField Memory ECC" + depends on ARM64 && ((MELLANOX_PLATFORM && ACPI) || COMPILE_TEST) + help + Support for error detection and correction on the + Mellanox BlueField SoCs. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 165ca65e1a3a..d265ff9311f0 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -85,3 +85,4 @@ obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o obj-$(CONFIG_EDAC_TI) += ti_edac.o obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o +obj-$(CONFIG_EDAC_BLUEFIELD) += bluefield_edac.o diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index c2e693e34d43..fbda4b876afd 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -222,7 +222,6 @@ static unsigned long get_total_mem(void) static const struct of_device_id altr_sdram_ctrl_of_match[] = { { .compatible = "altr,sdram-edac", .data = &c5_data}, { .compatible = "altr,sdram-edac-a10", .data = &a10_data}, - { .compatible = "altr,sdram-edac-s10", .data = &a10_data}, {}, }; MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); @@ -1170,6 +1169,24 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) return 0; } +/*********************** SDRAM EDAC Device Functions *********************/ + +#ifdef CONFIG_EDAC_ALTERA_SDRAM + +static const struct edac_device_prv_data s10_sdramecc_data = { + .setup = altr_check_ecc_deps, + .ce_clear_mask = ALTR_S10_ECC_SERRPENA, + .ue_clear_mask = ALTR_S10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_S10_ECC_EN, + .ecc_en_ofst = ALTR_S10_ECC_CTRL_SDRAM_OFST, + .ce_set_mask = ALTR_S10_ECC_TSERRA, + .ue_set_mask = ALTR_S10_ECC_TDERRA, + .set_err_ofst = ALTR_S10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; +#endif /* CONFIG_EDAC_ALTERA_SDRAM */ + /*********************** OCRAM EDAC Device Functions *********************/ #ifdef CONFIG_EDAC_ALTERA_OCRAM @@ -1759,6 +1776,9 @@ static const struct of_device_id altr_edac_a10_device_of_match[] = { #ifdef CONFIG_EDAC_ALTERA_SDMMC { .compatible = "altr,socfpga-sdmmc-ecc", .data = &a10_sdmmcecca_data }, #endif +#ifdef CONFIG_EDAC_ALTERA_SDRAM + { .compatible = "altr,sdram-edac-s10", .data = &s10_sdramecc_data }, +#endif {}, }; MODULE_DEVICE_TABLE(of, altr_edac_a10_device_of_match); @@ -1866,6 +1886,7 @@ static void altr_edac_a10_irq_handler(struct irq_desc *desc) struct altr_arria10_edac *edac = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); int irq = irq_desc_get_irq(desc); + unsigned long bits; dberr = (irq == edac->db_irq) ? 1 : 0; sm_offset = dberr ? A10_SYSMGR_ECC_INTSTAT_DERR_OFST : @@ -1875,7 +1896,8 @@ static void altr_edac_a10_irq_handler(struct irq_desc *desc) regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status); - for_each_set_bit(bit, (unsigned long *)&irq_status, 32) { + bits = irq_status; + for_each_set_bit(bit, &bits, 32) { irq = irq_linear_revmap(edac->domain, dberr * 32 + bit); if (irq) generic_handle_irq(irq); @@ -1889,6 +1911,10 @@ static int validate_parent_available(struct device_node *np) struct device_node *parent; int ret = 0; + /* SDRAM must be present for Linux (implied parent) */ + if (of_device_is_compatible(np, "altr,sdram-edac-s10")) + return 0; + /* Ensure parent device is enabled if parent node exists */ parent = of_parse_phandle(np, "altr,ecc-parent", 0); if (parent && !of_device_is_available(parent)) @@ -1898,6 +1924,22 @@ static int validate_parent_available(struct device_node *np) return ret; } +static int get_s10_sdram_edac_resource(struct device_node *np, + struct resource *res) +{ + struct device_node *parent; + int ret; + + parent = of_parse_phandle(np, "altr,sdr-syscon", 0); + if (!parent) + return -ENODEV; + + ret = of_address_to_resource(parent, 0, res); + of_node_put(parent); + + return ret; +} + static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, struct device_node *np) { @@ -1925,7 +1967,11 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL)) return -ENOMEM; - rc = of_address_to_resource(np, 0, &res); + if (of_device_is_compatible(np, "altr,sdram-edac-s10")) + rc = get_s10_sdram_edac_resource(np, &res); + else + rc = of_address_to_resource(np, 0, &res); + if (rc < 0) { edac_printk(KERN_ERR, EDAC_DEVICE, "%s: no resource address\n", ecc_name); @@ -2231,13 +2277,15 @@ static int altr_edac_a10_probe(struct platform_device *pdev) of_device_is_compatible(child, "altr,socfpga-dma-ecc") || of_device_is_compatible(child, "altr,socfpga-usb-ecc") || of_device_is_compatible(child, "altr,socfpga-qspi-ecc") || +#ifdef CONFIG_EDAC_ALTERA_SDRAM + of_device_is_compatible(child, "altr,sdram-edac-s10") || +#endif of_device_is_compatible(child, "altr,socfpga-sdmmc-ecc")) altr_edac_a10_device_add(edac, child); #ifdef CONFIG_EDAC_ALTERA_SDRAM - else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) || - (of_device_is_compatible(child, "altr,sdram-edac-s10"))) + else if (of_device_is_compatible(child, "altr,sdram-edac-a10")) of_platform_populate(pdev->dev.of_node, altr_sdram_ctrl_of_match, NULL, &pdev->dev); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 55654cc4bcdf..3727e72c8c2e 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -289,6 +289,29 @@ struct altr_sdram_mc_data { #define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000 /************* Stratix10 Defines **************/ +#define ALTR_S10_ECC_CTRL_SDRAM_OFST 0x00 +#define ALTR_S10_ECC_EN BIT(0) + +#define ALTR_S10_ECC_ERRINTEN_OFST 0x10 +#define ALTR_S10_ECC_ERRINTENS_OFST 0x14 +#define ALTR_S10_ECC_ERRINTENR_OFST 0x18 +#define ALTR_S10_ECC_SERRINTEN BIT(0) + +#define ALTR_S10_ECC_INTMODE_OFST 0x1C +#define ALTR_S10_ECC_INTMODE BIT(0) + +#define ALTR_S10_ECC_INTSTAT_OFST 0x20 +#define ALTR_S10_ECC_SERRPENA BIT(0) +#define ALTR_S10_ECC_DERRPENA BIT(8) +#define ALTR_S10_ECC_ERRPENA_MASK (ALTR_S10_ECC_SERRPENA | \ + ALTR_S10_ECC_DERRPENA) + +#define ALTR_S10_ECC_INTTEST_OFST 0x24 +#define ALTR_S10_ECC_TSERRA BIT(0) +#define ALTR_S10_ECC_TDERRA BIT(8) +#define ALTR_S10_ECC_TSERRB BIT(16) +#define ALTR_S10_ECC_TDERRB BIT(24) + #define ALTR_S10_DERR_ADDRA_OFST 0x2C /* Stratix10 ECC Manager Defines */ @@ -300,7 +323,7 @@ struct altr_sdram_mc_data { #define S10_SYSMGR_UE_ADDR_OFST 0x224 #define S10_DDR0_IRQ_MASK BIT(16) -#define S10_DBE_IRQ_MASK 0x3FE +#define S10_DBE_IRQ_MASK 0x3FFFE /* Define ECC Block Offsets for peripherals */ #define ECC_BLK_ADDRESS_OFST 0x40 diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 873437be86d9..c1d4536ae466 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -788,51 +788,45 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) (dclr & BIT(15)) ? "yes" : "no"); } -/* - * The Address Mask should be a contiguous set of bits in the non-interleaved - * case. So to check for CS interleaving, find the most- and least-significant - * bits of the mask, generate a contiguous bitmask, and compare the two. - */ -static bool f17_cs_interleaved(struct amd64_pvt *pvt, u8 ctrl, int cs) +#define CS_EVEN_PRIMARY BIT(0) +#define CS_ODD_PRIMARY BIT(1) +#define CS_EVEN_SECONDARY BIT(2) +#define CS_ODD_SECONDARY BIT(3) + +#define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) +#define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) + +static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) { - u32 mask = pvt->csels[ctrl].csmasks[cs >> 1]; - u32 msb = fls(mask) - 1, lsb = ffs(mask) - 1; - u32 test_mask = GENMASK(msb, lsb); + int cs_mode = 0; - edac_dbg(1, "mask=0x%08x test_mask=0x%08x\n", mask, test_mask); + if (csrow_enabled(2 * dimm, ctrl, pvt)) + cs_mode |= CS_EVEN_PRIMARY; - return mask ^ test_mask; + if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_PRIMARY; + + /* Asymmetric dual-rank DIMM support. */ + if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_SECONDARY; + + return cs_mode; } static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) { - int dimm, size0, size1, cs0, cs1; + int dimm, size0, size1, cs0, cs1, cs_mode; edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl); - for (dimm = 0; dimm < 4; dimm++) { - size0 = 0; + for (dimm = 0; dimm < 2; dimm++) { cs0 = dimm * 2; - - if (csrow_enabled(cs0, ctrl, pvt)) - size0 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs0); - - size1 = 0; cs1 = dimm * 2 + 1; - if (csrow_enabled(cs1, ctrl, pvt)) { - /* - * CS interleaving is only supported if both CSes have - * the same amount of memory. Because they are - * interleaved, it will look like both CSes have the - * full amount of memory. Save the size for both as - * half the amount we found on CS0, if interleaved. - */ - if (f17_cs_interleaved(pvt, ctrl, cs1)) - size1 = size0 = (size0 >> 1); - else - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, 0, cs1); - } + cs_mode = f17_get_cs_mode(dimm, ctrl, pvt); + + size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0); + size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", cs0, size0, @@ -942,89 +936,119 @@ static void prep_chip_selects(struct amd64_pvt *pvt) } else if (pvt->fam == 0x15 && pvt->model == 0x30) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; + } else if (pvt->fam >= 0x17) { + int umc; + + for_each_umc(umc) { + pvt->csels[umc].b_cnt = 4; + pvt->csels[umc].m_cnt = 2; + } + } else { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; } } +static void read_umc_base_mask(struct amd64_pvt *pvt) +{ + u32 umc_base_reg, umc_base_reg_sec; + u32 umc_mask_reg, umc_mask_reg_sec; + u32 base_reg, base_reg_sec; + u32 mask_reg, mask_reg_sec; + u32 *base, *base_sec; + u32 *mask, *mask_sec; + int cs, umc; + + for_each_umc(umc) { + umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR; + umc_base_reg_sec = get_umc_base(umc) + UMCCH_BASE_ADDR_SEC; + + for_each_chip_select(cs, umc, pvt) { + base = &pvt->csels[umc].csbases[cs]; + base_sec = &pvt->csels[umc].csbases_sec[cs]; + + base_reg = umc_base_reg + (cs * 4); + base_reg_sec = umc_base_reg_sec + (cs * 4); + + if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) + edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base, base_reg); + + if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, base_sec)) + edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base_sec, base_reg_sec); + } + + umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; + umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC; + + for_each_chip_select_mask(cs, umc, pvt) { + mask = &pvt->csels[umc].csmasks[cs]; + mask_sec = &pvt->csels[umc].csmasks_sec[cs]; + + mask_reg = umc_mask_reg + (cs * 4); + mask_reg_sec = umc_mask_reg_sec + (cs * 4); + + if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) + edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask, mask_reg); + + if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, mask_sec)) + edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask_sec, mask_reg_sec); + } + } +} + /* * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers */ static void read_dct_base_mask(struct amd64_pvt *pvt) { - int base_reg0, base_reg1, mask_reg0, mask_reg1, cs; + int cs; prep_chip_selects(pvt); - if (pvt->umc) { - base_reg0 = get_umc_base(0) + UMCCH_BASE_ADDR; - base_reg1 = get_umc_base(1) + UMCCH_BASE_ADDR; - mask_reg0 = get_umc_base(0) + UMCCH_ADDR_MASK; - mask_reg1 = get_umc_base(1) + UMCCH_ADDR_MASK; - } else { - base_reg0 = DCSB0; - base_reg1 = DCSB1; - mask_reg0 = DCSM0; - mask_reg1 = DCSM1; - } + if (pvt->umc) + return read_umc_base_mask(pvt); for_each_chip_select(cs, 0, pvt) { - int reg0 = base_reg0 + (cs * 4); - int reg1 = base_reg1 + (cs * 4); + int reg0 = DCSB0 + (cs * 4); + int reg1 = DCSB1 + (cs * 4); u32 *base0 = &pvt->csels[0].csbases[cs]; u32 *base1 = &pvt->csels[1].csbases[cs]; - if (pvt->umc) { - if (!amd_smn_read(pvt->mc_node_id, reg0, base0)) - edac_dbg(0, " DCSB0[%d]=0x%08x reg: 0x%x\n", - cs, *base0, reg0); + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) + edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", + cs, *base0, reg0); - if (!amd_smn_read(pvt->mc_node_id, reg1, base1)) - edac_dbg(0, " DCSB1[%d]=0x%08x reg: 0x%x\n", - cs, *base1, reg1); - } else { - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, base0)) - edac_dbg(0, " DCSB0[%d]=0x%08x reg: F2x%x\n", - cs, *base0, reg0); - - if (pvt->fam == 0xf) - continue; + if (pvt->fam == 0xf) + continue; - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) - edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", - cs, *base1, (pvt->fam == 0x10) ? reg1 - : reg0); - } + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, base1)) + edac_dbg(0, " DCSB1[%d]=0x%08x reg: F2x%x\n", + cs, *base1, (pvt->fam == 0x10) ? reg1 + : reg0); } for_each_chip_select_mask(cs, 0, pvt) { - int reg0 = mask_reg0 + (cs * 4); - int reg1 = mask_reg1 + (cs * 4); + int reg0 = DCSM0 + (cs * 4); + int reg1 = DCSM1 + (cs * 4); u32 *mask0 = &pvt->csels[0].csmasks[cs]; u32 *mask1 = &pvt->csels[1].csmasks[cs]; - if (pvt->umc) { - if (!amd_smn_read(pvt->mc_node_id, reg0, mask0)) - edac_dbg(0, " DCSM0[%d]=0x%08x reg: 0x%x\n", - cs, *mask0, reg0); - - if (!amd_smn_read(pvt->mc_node_id, reg1, mask1)) - edac_dbg(0, " DCSM1[%d]=0x%08x reg: 0x%x\n", - cs, *mask1, reg1); - } else { - if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) - edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", - cs, *mask0, reg0); + if (!amd64_read_dct_pci_cfg(pvt, 0, reg0, mask0)) + edac_dbg(0, " DCSM0[%d]=0x%08x reg: F2x%x\n", + cs, *mask0, reg0); - if (pvt->fam == 0xf) - continue; + if (pvt->fam == 0xf) + continue; - if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) - edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", - cs, *mask1, (pvt->fam == 0x10) ? reg1 - : reg0); - } + if (!amd64_read_dct_pci_cfg(pvt, 1, reg0, mask1)) + edac_dbg(0, " DCSM1[%d]=0x%08x reg: F2x%x\n", + cs, *mask1, (pvt->fam == 0x10) ? reg1 + : reg0); } } @@ -1556,18 +1580,58 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, return ddr3_cs_size(cs_mode, false); } -static int f17_base_addr_to_cs_size(struct amd64_pvt *pvt, u8 umc, +static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, unsigned int cs_mode, int csrow_nr) { - u32 base_addr = pvt->csels[umc].csbases[csrow_nr]; + u32 addr_mask_orig, addr_mask_deinterleaved; + u32 msb, weight, num_zero_bits; + int dimm, size = 0; - /* Each mask is used for every two base addresses. */ - u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr >> 1]; + /* No Chip Selects are enabled. */ + if (!cs_mode) + return size; - /* Register [31:1] = Address [39:9]. Size is in kBs here. */ - u32 size = ((addr_mask >> 1) - (base_addr >> 1) + 1) >> 1; + /* Requested size of an even CS but none are enabled. */ + if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1)) + return size; - edac_dbg(1, "BaseAddr: 0x%x, AddrMask: 0x%x\n", base_addr, addr_mask); + /* Requested size of an odd CS but none are enabled. */ + if (!(cs_mode & CS_ODD) && (csrow_nr & 1)) + return size; + + /* + * There is one mask per DIMM, and two Chip Selects per DIMM. + * CS0 and CS1 -> DIMM0 + * CS2 and CS3 -> DIMM1 + */ + dimm = csrow_nr >> 1; + + /* Asymmetric dual-rank DIMM support. */ + if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) + addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm]; + else + addr_mask_orig = pvt->csels[umc].csmasks[dimm]; + + /* + * The number of zero bits in the mask is equal to the number of bits + * in a full mask minus the number of bits in the current mask. + * + * The MSB is the number of bits in the full mask because BIT[0] is + * always 0. + */ + msb = fls(addr_mask_orig) - 1; + weight = hweight_long(addr_mask_orig); + num_zero_bits = msb - weight; + + /* Take the number of zero bits off from the top of the mask. */ + addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); + + edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); + edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); + edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); + + /* Register [31:1] = Address [39:9]. Size is in kBs here. */ + size = (addr_mask_deinterleaved >> 2) + 1; /* Return size in MBs. */ return size >> 10; @@ -2232,7 +2296,7 @@ static struct amd64_family_type family_types[] = { .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, [F17_M10H_CPUS] = { @@ -2241,7 +2305,7 @@ static struct amd64_family_type family_types[] = { .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, [F17_M30H_CPUS] = { @@ -2250,7 +2314,16 @@ static struct amd64_family_type family_types[] = { .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6, .ops = { .early_channel_count = f17_early_channel_count, - .dbam_to_cs = f17_base_addr_to_cs_size, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, + [F17_M70H_CPUS] = { + .ctl_name = "F17h_M70h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, } }, }; @@ -2537,13 +2610,6 @@ static void decode_umc_error(int node_id, struct mce *m) err.channel = find_umc_channel(m); - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { - err.err_code = ERR_NORM_ADDR; - goto log_error; - } - - error_address_to_page_and_offset(sys_addr, &err); - if (!(m->status & MCI_STATUS_SYNDV)) { err.err_code = ERR_SYND; goto log_error; @@ -2560,6 +2626,13 @@ static void decode_umc_error(int node_id, struct mce *m) err.csrow = m->synd & 0x7; + if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { + err.err_code = ERR_NORM_ADDR; + goto log_error; + } + + error_address_to_page_and_offset(sys_addr, &err); + log_error: __log_ecc_error(mci, &err, ecc_type); } @@ -2809,10 +2882,12 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) int csrow_nr = csrow_nr_orig; u32 cs_mode, nr_pages; - if (!pvt->umc) + if (!pvt->umc) { csrow_nr >>= 1; - - cs_mode = DBAM_DIMM(csrow_nr, dbam); + cs_mode = DBAM_DIMM(csrow_nr, dbam); + } else { + cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt); + } nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); nr_pages <<= 20 - PAGE_SHIFT; @@ -2824,6 +2899,49 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) return nr_pages; } +static int init_csrows_df(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + enum edac_type edac_mode = EDAC_NONE; + enum dev_type dev_type = DEV_UNKNOWN; + struct dimm_info *dimm; + int empty = 1; + u8 umc, cs; + + if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) { + edac_mode = EDAC_S16ECD16ED; + dev_type = DEV_X16; + } else if (mci->edac_ctl_cap & EDAC_FLAG_S8ECD8ED) { + edac_mode = EDAC_S8ECD8ED; + dev_type = DEV_X8; + } else if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) { + edac_mode = EDAC_S4ECD4ED; + dev_type = DEV_X4; + } else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) { + edac_mode = EDAC_SECDED; + } + + for_each_umc(umc) { + for_each_chip_select(cs, umc, pvt) { + if (!csrow_enabled(cs, umc, pvt)) + continue; + + empty = 0; + dimm = mci->csrows[cs]->channels[umc]->dimm; + + edac_dbg(1, "MC node: %d, csrow: %d\n", + pvt->mc_node_id, cs); + + dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); + dimm->mtype = pvt->dram_type; + dimm->edac_mode = edac_mode; + dimm->dtype = dev_type; + } + } + + return empty; +} + /* * Initialize the array of csrow attribute instances, based on the values * from pci config hardware registers. @@ -2838,15 +2956,16 @@ static int init_csrows(struct mem_ctl_info *mci) int nr_pages = 0; u32 val; - if (!pvt->umc) { - amd64_read_pci_cfg(pvt->F3, NBCFG, &val); + if (pvt->umc) + return init_csrows_df(mci); - pvt->nbcfg = val; + amd64_read_pci_cfg(pvt->F3, NBCFG, &val); - edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", - pvt->mc_node_id, val, - !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); - } + pvt->nbcfg = val; + + edac_dbg(0, "node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", + pvt->mc_node_id, val, + !!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE)); /* * We iterate over DCT0 here but we look at DCT1 in parallel, if needed. @@ -2883,13 +3002,7 @@ static int init_csrows(struct mem_ctl_info *mci) edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); /* Determine DIMM ECC mode: */ - if (pvt->umc) { - if (mci->edac_ctl_cap & EDAC_FLAG_S4ECD4ED) - edac_mode = EDAC_S4ECD4ED; - else if (mci->edac_ctl_cap & EDAC_FLAG_SECDED) - edac_mode = EDAC_SECDED; - - } else if (pvt->nbcfg & NBCFG_ECC_ENABLE) { + if (pvt->nbcfg & NBCFG_ECC_ENABLE) { edac_mode = (pvt->nbcfg & NBCFG_CHIPKILL) ? EDAC_S4ECD4ED : EDAC_SECDED; @@ -3137,12 +3250,15 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid) static inline void f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) { - u8 i, ecc_en = 1, cpk_en = 1; + u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1; for_each_umc(i) { if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { ecc_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_ENABLED); cpk_en &= !!(pvt->umc[i].umc_cap_hi & UMC_ECC_CHIPKILL_CAP); + + dev_x4 &= !!(pvt->umc[i].dimm_cfg & BIT(6)); + dev_x16 &= !!(pvt->umc[i].dimm_cfg & BIT(7)); } } @@ -3150,8 +3266,15 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) if (ecc_en) { mci->edac_ctl_cap |= EDAC_FLAG_SECDED; - if (cpk_en) + if (!cpk_en) + return; + + if (dev_x4) mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; + else if (dev_x16) + mci->edac_ctl_cap |= EDAC_FLAG_S16ECD16ED; + else + mci->edac_ctl_cap |= EDAC_FLAG_S8ECD8ED; } } @@ -3241,6 +3364,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) fam_type = &family_types[F17_M30H_CPUS]; pvt->ops = &family_types[F17_M30H_CPUS].ops; break; + } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) { + fam_type = &family_types[F17_M70H_CPUS]; + pvt->ops = &family_types[F17_M70H_CPUS].ops; + break; } /* fall through */ case 0x18: diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 8f66472f7adc..8c3cda81e619 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -96,6 +96,7 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 +#define NUM_CONTROLLERS 8 #define ON true #define OFF false @@ -119,6 +120,8 @@ #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496 +#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440 +#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 /* * Function 1 - Address Map @@ -168,7 +171,8 @@ #define DCSM0 0x60 #define DCSM1 0x160 -#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define csrow_sec_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases_sec[(i)] & DCSB_CS_ENABLE) #define DRAM_CONTROL 0x78 @@ -258,7 +262,9 @@ /* UMC CH register offsets */ #define UMCCH_BASE_ADDR 0x0 +#define UMCCH_BASE_ADDR_SEC 0x10 #define UMCCH_ADDR_MASK 0x20 +#define UMCCH_ADDR_MASK_SEC 0x28 #define UMCCH_ADDR_CFG 0x30 #define UMCCH_DIMM_CFG 0x80 #define UMCCH_UMC_CFG 0x100 @@ -285,6 +291,7 @@ enum amd_families { F17_CPUS, F17_M10H_CPUS, F17_M30H_CPUS, + F17_M70H_CPUS, NUM_FAMILIES, }; @@ -311,9 +318,11 @@ struct dram_range { /* A DCT chip selects collection */ struct chip_select { u32 csbases[NUM_CHIPSELECTS]; + u32 csbases_sec[NUM_CHIPSELECTS]; u8 b_cnt; u32 csmasks[NUM_CHIPSELECTS]; + u32 csmasks_sec[NUM_CHIPSELECTS]; u8 m_cnt; }; @@ -351,8 +360,8 @@ struct amd64_pvt { u32 dbam0; /* DRAM Base Address Mapping reg for DCT0 */ u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */ - /* one for each DCT */ - struct chip_select csels[2]; + /* one for each DCT/UMC */ + struct chip_select csels[NUM_CONTROLLERS]; /* DRAM base and limit pairs F1x[78,70,68,60,58,50,48,40] */ struct dram_range ranges[DRAM_RANGES]; diff --git a/drivers/edac/bluefield_edac.c b/drivers/edac/bluefield_edac.c new file mode 100644 index 000000000000..e4736eb37bfb --- /dev/null +++ b/drivers/edac/bluefield_edac.c @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Bluefield-specific EDAC driver. + * + * Copyright (c) 2019 Mellanox Technologies. + */ + +#include <linux/acpi.h> +#include <linux/arm-smccc.h> +#include <linux/bitfield.h> +#include <linux/edac.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> + +#include "edac_module.h" + +#define DRIVER_NAME "bluefield-edac" + +/* + * Mellanox BlueField EMI (External Memory Interface) register definitions. + */ + +#define MLXBF_ECC_CNT 0x340 +#define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0) +#define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16) + +#define MLXBF_ECC_ERR 0x348 +#define MLXBF_ECC_ERR__SECC BIT(0) +#define MLXBF_ECC_ERR__DECC BIT(16) + +#define MLXBF_ECC_LATCH_SEL 0x354 +#define MLXBF_ECC_LATCH_SEL__START BIT(24) + +#define MLXBF_ERR_ADDR_0 0x358 + +#define MLXBF_ERR_ADDR_1 0x37c + +#define MLXBF_SYNDROM 0x35c +#define MLXBF_SYNDROM__DERR BIT(0) +#define MLXBF_SYNDROM__SERR BIT(1) +#define MLXBF_SYNDROM__SYN GENMASK(25, 16) + +#define MLXBF_ADD_INFO 0x364 +#define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8) + +#define MLXBF_EDAC_MAX_DIMM_PER_MC 2 +#define MLXBF_EDAC_ERROR_GRAIN 8 + +/* + * Request MLNX_SIP_GET_DIMM_INFO + * + * Retrieve information about DIMM on a certain slot. + * + * Call register usage: + * a0: MLNX_SIP_GET_DIMM_INFO + * a1: (Memory controller index) << 16 | (Dimm index in memory controller) + * a2-7: not used. + * + * Return status: + * a0: MLXBF_DIMM_INFO defined below describing the DIMM. + * a1-3: not used. + */ +#define MLNX_SIP_GET_DIMM_INFO 0x82000008 + +/* Format for the SMC response about the memory information */ +#define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) +#define MLXBF_DIMM_INFO__IS_RDIMM BIT(16) +#define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17) +#define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18) +#define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21) +#define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) + +struct bluefield_edac_priv { + int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; + void __iomem *emi_base; + int dimm_per_mc; +}; + +static u64 smc_call1(u64 smc_op, u64 smc_arg) +{ + struct arm_smccc_res res; + + arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + +/* + * Gather the ECC information from the External Memory Interface registers + * and report it to the edac handler. + */ +static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, + int error_cnt, + int is_single_ecc) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + u32 dram_additional_info, err_prank, edea0, edea1; + u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; + enum hw_event_mc_err_type ecc_type; + u64 ecc_dimm_addr; + int ecc_dimm; + + ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : + HW_EVENT_ERR_UNCORRECTED; + + /* + * Tell the External Memory Interface to populate the relevant + * registers with information about the last ECC error occurrence. + */ + ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; + writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL); + + /* + * Verify that the ECC reported info in the registers is of the + * same type as the one asked to report. If not, just report the + * error without the detailed information. + */ + dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM); + serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); + derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); + syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); + + if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) { + edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0, + 0, 0, -1, mci->ctl_name, ""); + return; + } + + dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO); + err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); + + ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; + + edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0); + edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1); + + ecc_dimm_addr = ((u64)edea1 << 32) | edea0; + + edac_mc_handle_error(ecc_type, mci, error_cnt, + PFN_DOWN(ecc_dimm_addr), + offset_in_page(ecc_dimm_addr), + syndrom, ecc_dimm, 0, 0, mci->ctl_name, ""); +} + +static void bluefield_edac_check(struct mem_ctl_info *mci) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; + + /* + * The memory controller might not be initialized by the firmware + * when there isn't memory, which may lead to bad register readings. + */ + if (mci->edac_cap == EDAC_FLAG_NONE) + return; + + ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT); + single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); + double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); + + if (single_error_count) { + ecc_error |= MLXBF_ECC_ERR__SECC; + + bluefield_gather_report_ecc(mci, single_error_count, 1); + } + + if (double_error_count) { + ecc_error |= MLXBF_ECC_ERR__DECC; + + bluefield_gather_report_ecc(mci, double_error_count, 0); + } + + /* Write to clear reported errors. */ + if (ecc_count) + writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR); +} + +/* Initialize the DIMMs information for the given memory controller. */ +static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) +{ + struct bluefield_edac_priv *priv = mci->pvt_info; + int mem_ctrl_idx = mci->mc_idx; + struct dimm_info *dimm; + u64 smc_info, smc_arg; + int is_empty = 1, i; + + for (i = 0; i < priv->dimm_per_mc; i++) { + dimm = mci->dimms[i]; + + smc_arg = mem_ctrl_idx << 16 | i; + smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg); + + if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { + dimm->mtype = MEM_EMPTY; + continue; + } + + is_empty = 0; + + dimm->edac_mode = EDAC_SECDED; + + if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info)) + dimm->mtype = MEM_NVDIMM; + else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info)) + dimm->mtype = MEM_LRDDR4; + else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info)) + dimm->mtype = MEM_RDDR4; + else + dimm->mtype = MEM_DDR4; + + dimm->nr_pages = + FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) * + (SZ_1G / PAGE_SIZE); + dimm->grain = MLXBF_EDAC_ERROR_GRAIN; + + /* Mem controller for BlueField only supports x4, x8 and x16 */ + switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) { + case 4: + dimm->dtype = DEV_X4; + break; + case 8: + dimm->dtype = DEV_X8; + break; + case 16: + dimm->dtype = DEV_X16; + break; + default: + dimm->dtype = DEV_UNKNOWN; + } + + priv->dimm_ranks[i] = + FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info); + } + + if (is_empty) + mci->edac_cap = EDAC_FLAG_NONE; + else + mci->edac_cap = EDAC_FLAG_SECDED; +} + +static int bluefield_edac_mc_probe(struct platform_device *pdev) +{ + struct bluefield_edac_priv *priv; + struct device *dev = &pdev->dev; + struct edac_mc_layer layers[1]; + struct mem_ctl_info *mci; + struct resource *emi_res; + unsigned int mc_idx, dimm_count; + int rc, ret; + + /* Read the MSS (Memory SubSystem) index from ACPI table. */ + if (device_property_read_u32(dev, "mss_number", &mc_idx)) { + dev_warn(dev, "bf_edac: MSS number unknown\n"); + return -EINVAL; + } + + /* Read the DIMMs per MC from ACPI table. */ + if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) { + dev_warn(dev, "bf_edac: DIMMs per MC unknown\n"); + return -EINVAL; + } + + if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) { + dev_warn(dev, "bf_edac: DIMMs per MC not valid\n"); + return -EINVAL; + } + + emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!emi_res) + return -EINVAL; + + layers[0].type = EDAC_MC_LAYER_SLOT; + layers[0].size = dimm_count; + layers[0].is_virt_csrow = true; + + mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv)); + if (!mci) + return -ENOMEM; + + priv = mci->pvt_info; + + priv->dimm_per_mc = dimm_count; + priv->emi_base = devm_ioremap_resource(dev, emi_res); + if (IS_ERR(priv->emi_base)) { + dev_err(dev, "failed to map EMI IO resource\n"); + ret = PTR_ERR(priv->emi_base); + goto err; + } + + mci->pdev = dev; + mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | + MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + + mci->mod_name = DRIVER_NAME; + mci->ctl_name = "BlueField_Memory_Controller"; + mci->dev_name = dev_name(dev); + mci->edac_check = bluefield_edac_check; + + /* Initialize mci with the actual populated DIMM information. */ + bluefield_edac_init_dimms(mci); + + platform_set_drvdata(pdev, mci); + + /* Register with EDAC core */ + rc = edac_mc_add_mc(mci); + if (rc) { + dev_err(dev, "failed to register with EDAC core\n"); + ret = rc; + goto err; + } + + /* Only POLL mode supported so far. */ + edac_op_state = EDAC_OPSTATE_POLL; + + return 0; + +err: + edac_mc_free(mci); + + return ret; + +} + +static int bluefield_edac_mc_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = platform_get_drvdata(pdev); + + edac_mc_del_mc(&pdev->dev); + edac_mc_free(mci); + + return 0; +} + +static const struct acpi_device_id bluefield_mc_acpi_ids[] = { + {"MLNXBF08", 0}, + {} +}; + +MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids); + +static struct platform_driver bluefield_edac_mc_driver = { + .driver = { + .name = DRIVER_NAME, + .acpi_match_table = bluefield_mc_acpi_ids, + }, + .probe = bluefield_edac_mc_probe, + .remove = bluefield_edac_mc_remove, +}; + +module_platform_driver(bluefield_edac_mc_driver); + +MODULE_DESCRIPTION("Mellanox BlueField memory edac driver"); +MODULE_AUTHOR("Mellanox Technologies"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 64922c8fa7e3..e6fd079783bd 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -114,8 +114,8 @@ static const struct kernel_param_ops edac_report_ops = { module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644); -unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, - unsigned len) +unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf, + unsigned int len) { struct mem_ctl_info *mci = dimm->mci; int i, n, count = 0; @@ -236,9 +236,9 @@ EXPORT_SYMBOL_GPL(edac_mem_types); * At return, the pointer 'p' will be incremented to be used on a next call * to this function. */ -void *edac_align_ptr(void **p, unsigned size, int n_elems) +void *edac_align_ptr(void **p, unsigned int size, int n_elems) { - unsigned align, r; + unsigned int align, r; void *ptr = *p; *p += size * n_elems; @@ -275,38 +275,37 @@ void *edac_align_ptr(void **p, unsigned size, int n_elems) static void _edac_mc_free(struct mem_ctl_info *mci) { - int i, chn, row; struct csrow_info *csr; - const unsigned int tot_dimms = mci->tot_dimms; - const unsigned int tot_channels = mci->num_cschannel; - const unsigned int tot_csrows = mci->nr_csrows; + int i, chn, row; if (mci->dimms) { - for (i = 0; i < tot_dimms; i++) + for (i = 0; i < mci->tot_dimms; i++) kfree(mci->dimms[i]); kfree(mci->dimms); } + if (mci->csrows) { - for (row = 0; row < tot_csrows; row++) { + for (row = 0; row < mci->nr_csrows; row++) { csr = mci->csrows[row]; - if (csr) { - if (csr->channels) { - for (chn = 0; chn < tot_channels; chn++) - kfree(csr->channels[chn]); - kfree(csr->channels); - } - kfree(csr); + if (!csr) + continue; + + if (csr->channels) { + for (chn = 0; chn < mci->num_cschannel; chn++) + kfree(csr->channels[chn]); + kfree(csr->channels); } + kfree(csr); } kfree(mci->csrows); } kfree(mci); } -struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, - unsigned n_layers, +struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, + unsigned int n_layers, struct edac_mc_layer *layers, - unsigned sz_pvt) + unsigned int sz_pvt) { struct mem_ctl_info *mci; struct edac_mc_layer *layer; @@ -314,9 +313,9 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, struct rank_info *chan; struct dimm_info *dimm; u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; - unsigned pos[EDAC_MAX_LAYERS]; - unsigned size, tot_dimms = 1, count = 1; - unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; + unsigned int pos[EDAC_MAX_LAYERS]; + unsigned int size, tot_dimms = 1, count = 1; + unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0; void *pvt, *p, *ptr = NULL; int i, j, row, chn, n, len, off; bool per_rank = false; @@ -1235,9 +1234,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, if (p > e->location) *(p - 1) = '\0'; - /* Report the error via the trace interface */ - grain_bits = fls_long(e->grain) + 1; + /* Sanity-check driver-supplied grain value. */ + if (WARN_ON_ONCE(!e->grain)) + e->grain = 1; + grain_bits = fls_long(e->grain - 1); + + /* Report the error via the trace interface */ if (IS_ENABLED(CONFIG_RAS)) trace_mc_event(type, e->msg, e->label, e->error_count, mci->mc_idx, e->top_layer, e->mid_layer, diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_mc.h index 4165e15995ad..02aac5c61d00 100644 --- a/drivers/edac/edac_mc.h +++ b/drivers/edac/edac_mc.h @@ -122,10 +122,10 @@ do { \ * On success, return a pointer to struct mem_ctl_info pointer; * %NULL otherwise */ -struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, - unsigned n_layers, +struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num, + unsigned int n_layers, struct edac_mc_layer *layers, - unsigned sz_pvt); + unsigned int sz_pvt); /** * edac_get_owner - Return the owner's mod_name of EDAC MC diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 4386ea4b9b5a..32d016f1ecd1 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -131,7 +131,7 @@ static const char * const edac_caps[] = { struct dev_ch_attribute { struct device_attribute attr; - int channel; + unsigned int channel; }; #define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \ @@ -200,7 +200,7 @@ static ssize_t channel_dimm_label_show(struct device *dev, char *data) { struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); + unsigned int chan = to_channel(mattr); struct rank_info *rank = csrow->channels[chan]; /* if field has not been initialized, there is nothing to send */ @@ -216,7 +216,7 @@ static ssize_t channel_dimm_label_store(struct device *dev, const char *data, size_t count) { struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); + unsigned int chan = to_channel(mattr); struct rank_info *rank = csrow->channels[chan]; size_t copy_count = count; @@ -240,7 +240,7 @@ static ssize_t channel_ce_count_show(struct device *dev, struct device_attribute *mattr, char *data) { struct csrow_info *csrow = to_csrow(dev); - unsigned chan = to_channel(mattr); + unsigned int chan = to_channel(mattr); struct rank_info *rank = csrow->channels[chan]; return sprintf(data, "%u\n", rank->ce_count); @@ -278,7 +278,7 @@ static void csrow_attr_release(struct device *dev) { struct csrow_info *csrow = container_of(dev, struct csrow_info, dev); - edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev)); + edac_dbg(1, "device %s released\n", dev_name(dev)); kfree(csrow); } @@ -414,14 +414,16 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci, dev_set_name(&csrow->dev, "csrow%d", index); dev_set_drvdata(&csrow->dev, csrow); - edac_dbg(0, "creating (virtual) csrow node %s\n", - dev_name(&csrow->dev)); - err = device_add(&csrow->dev); - if (err) + if (err) { + edac_dbg(1, "failure: create device %s\n", dev_name(&csrow->dev)); put_device(&csrow->dev); + return err; + } - return err; + edac_dbg(0, "device %s created\n", dev_name(&csrow->dev)); + + return 0; } /* Create a CSROW object under specifed edac_mc_device */ @@ -435,12 +437,8 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci) if (!nr_pages_per_csrow(csrow)) continue; err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) { - edac_dbg(1, - "failure: create csrow objects for csrow %d\n", - i); + if (err < 0) goto error; - } } return 0; @@ -624,7 +622,7 @@ static void dimm_attr_release(struct device *dev) { struct dimm_info *dimm = container_of(dev, struct dimm_info, dev); - edac_dbg(1, "Releasing dimm device %s\n", dev_name(dev)); + edac_dbg(1, "device %s released\n", dev_name(dev)); kfree(dimm); } @@ -653,12 +651,21 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci, pm_runtime_forbid(&mci->dev); err = device_add(&dimm->dev); - if (err) + if (err) { + edac_dbg(1, "failure: create device %s\n", dev_name(&dimm->dev)); put_device(&dimm->dev); + return err; + } - edac_dbg(0, "created rank/dimm device %s\n", dev_name(&dimm->dev)); + if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { + char location[80]; - return err; + edac_dimm_info_location(dimm, location, sizeof(location)); + edac_dbg(0, "device %s created at location %s\n", + dev_name(&dimm->dev), location); + } + + return 0; } /* @@ -901,7 +908,7 @@ static void mci_attr_release(struct device *dev) { struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev); - edac_dbg(1, "Releasing csrow device %s\n", dev_name(dev)); + edac_dbg(1, "device %s released\n", dev_name(dev)); kfree(mci); } @@ -933,14 +940,15 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, dev_set_drvdata(&mci->dev, mci); pm_runtime_forbid(&mci->dev); - edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); err = device_add(&mci->dev); if (err < 0) { edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); put_device(&mci->dev); - goto out; + return err; } + edac_dbg(0, "device %s created\n", dev_name(&mci->dev)); + /* * Create the dimm/rank devices */ @@ -950,22 +958,9 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, if (!dimm->nr_pages) continue; -#ifdef CONFIG_EDAC_DEBUG - edac_dbg(1, "creating dimm%d, located at ", i); - if (edac_debug_level >= 1) { - int lay; - for (lay = 0; lay < mci->n_layers; lay++) - printk(KERN_CONT "%s %d ", - edac_layer_name[mci->layers[lay].type], - dimm->location[lay]); - printk(KERN_CONT "\n"); - } -#endif err = edac_create_dimm_object(mci, dimm, i); - if (err) { - edac_dbg(1, "failure: create dimm %d obj\n", i); + if (err) goto fail_unregister_dimm; - } } #ifdef CONFIG_EDAC_LEGACY_SYSFS @@ -987,7 +982,6 @@ fail_unregister_dimm: } device_unregister(&mci->dev); -out: return err; } @@ -1011,14 +1005,14 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) struct dimm_info *dimm = mci->dimms[i]; if (dimm->nr_pages == 0) continue; - edac_dbg(0, "removing device %s\n", dev_name(&dimm->dev)); + edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev)); device_unregister(&dimm->dev); } } void edac_unregister_sysfs(struct mem_ctl_info *mci) { - edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); + edac_dbg(1, "unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); } @@ -1029,7 +1023,7 @@ static void mc_attr_release(struct device *dev) * parent device, used to create the /sys/devices/mc sysfs node. * So, there are no attributes on it. */ - edac_dbg(1, "Releasing device %s\n", dev_name(dev)); + edac_dbg(1, "device %s released\n", dev_name(dev)); kfree(dev); } @@ -1044,10 +1038,8 @@ int __init edac_mc_sysfs_init(void) int err; mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL); - if (!mci_pdev) { - err = -ENOMEM; - goto out; - } + if (!mci_pdev) + return -ENOMEM; mci_pdev->bus = edac_get_sysfs_subsys(); mci_pdev->type = &mc_attr_type; @@ -1055,17 +1047,15 @@ int __init edac_mc_sysfs_init(void) dev_set_name(mci_pdev, "mc"); err = device_add(mci_pdev); - if (err < 0) - goto out_put_device; + if (err < 0) { + edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev)); + put_device(mci_pdev); + return err; + } edac_dbg(0, "device %s created\n", dev_name(mci_pdev)); return 0; - - out_put_device: - put_device(mci_pdev); - out: - return err; } void edac_mc_sysfs_exit(void) diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 7f19f1c672c3..d413a0bdc9ad 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -68,7 +68,7 @@ struct memdev_dmi_entry { struct ghes_edac_dimm_fill { struct mem_ctl_info *mci; - unsigned count; + unsigned int count; }; static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index b506eef6b146..251f2b692785 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -417,7 +417,8 @@ static const char *i5100_err_msg(unsigned err) } /* convert csrow index into a rank (per channel -- 0..5) */ -static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow) +static unsigned int i5100_csrow_to_rank(const struct mem_ctl_info *mci, + unsigned int csrow) { const struct i5100_priv *priv = mci->pvt_info; @@ -425,7 +426,8 @@ static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow) } /* convert csrow index into a channel (0..1) */ -static int i5100_csrow_to_chan(const struct mem_ctl_info *mci, int csrow) +static unsigned int i5100_csrow_to_chan(const struct mem_ctl_info *mci, + unsigned int csrow) { const struct i5100_priv *priv = mci->pvt_info; @@ -653,11 +655,11 @@ static struct pci_dev *pci_get_device_func(unsigned vendor, return ret; } -static unsigned long i5100_npages(struct mem_ctl_info *mci, int csrow) +static unsigned long i5100_npages(struct mem_ctl_info *mci, unsigned int csrow) { struct i5100_priv *priv = mci->pvt_info; - const unsigned chan_rank = i5100_csrow_to_rank(mci, csrow); - const unsigned chan = i5100_csrow_to_chan(mci, csrow); + const unsigned int chan_rank = i5100_csrow_to_rank(mci, csrow); + const unsigned int chan = i5100_csrow_to_chan(mci, csrow); unsigned addr_lines; /* dimm present? */ @@ -852,8 +854,8 @@ static void i5100_init_csrows(struct mem_ctl_info *mci) for (i = 0; i < mci->tot_dimms; i++) { struct dimm_info *dimm; const unsigned long npages = i5100_npages(mci, i); - const unsigned chan = i5100_csrow_to_chan(mci, i); - const unsigned rank = i5100_csrow_to_rank(mci, i); + const unsigned int chan = i5100_csrow_to_chan(mci, i); + const unsigned int rank = i5100_csrow_to_rank(mci, i); if (!npages) continue; diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index ca25f8fe57ef..1ad538baaa4a 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -260,11 +260,14 @@ static u64 get_sideband_reg_base_addr(void) } } +#define DNV_MCHBAR_SIZE 0x8000 +#define DNV_SB_PORT_SIZE 0x10000 static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *name) { struct pci_dev *pdev; char *base; u64 addr; + unsigned long size; if (op == 4) { pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL); @@ -279,15 +282,17 @@ static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *na addr = get_mem_ctrl_hub_base_addr(); if (!addr) return -ENODEV; + size = DNV_MCHBAR_SIZE; } else { /* MMIO via sideband register base address */ addr = get_sideband_reg_base_addr(); if (!addr) return -ENODEV; addr += (port << 16); + size = DNV_SB_PORT_SIZE; } - base = ioremap((resource_size_t)addr, 0x10000); + base = ioremap((resource_size_t)addr, size); if (!base) return -ENODEV; |