From 8960de4a5ca7980ed1e19e7ca5a774d3b7a55c38 Mon Sep 17 00:00:00 2001 From: Michael Jin Date: Thu, 16 Aug 2018 15:28:40 -0400 Subject: EDAC, amd64: Add Family 17h, models 10h-2fh support Add new device IDs for family 17h, models 10h-2fh. This is required by amd64_edac_mod in order to properly detect PCI device functions 0 and 6. Signed-off-by: Michael Jin Reviewed-by: Yazen Ghannam Cc: Link: http://lkml.kernel.org/r/20180816192840.31166-1-mikhail.jin@gmail.com Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 14 ++++++++++++++ drivers/edac/amd64_edac.h | 3 +++ 2 files changed, 17 insertions(+) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 18aeabb1d5ee..e2addb2bca29 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2200,6 +2200,15 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_base_addr_to_cs_size, } }, + [F17_M10H_CPUS] = { + .ctl_name = "F17h_M10h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_base_addr_to_cs_size, + } + }, }; /* @@ -3188,6 +3197,11 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) break; case 0x17: + if (pvt->model >= 0x10 && pvt->model <= 0x2f) { + fam_type = &family_types[F17_M10H_CPUS]; + pvt->ops = &family_types[F17_M10H_CPUS].ops; + break; + } fam_type = &family_types[F17_CPUS]; pvt->ops = &family_types[F17_CPUS].ops; break; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 1d4b74e9a037..4242f8e39c18 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -115,6 +115,8 @@ #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F2 0x1582 #define PCI_DEVICE_ID_AMD_17H_DF_F0 0x1460 #define PCI_DEVICE_ID_AMD_17H_DF_F6 0x1466 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F0 0x15e8 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee /* * Function 1 - Address Map @@ -281,6 +283,7 @@ enum amd_families { F16_CPUS, F16_M30H_CPUS, F17_CPUS, + F17_M10H_CPUS, NUM_FAMILIES, }; -- cgit v1.2.3 From bd1852317ffea39ed970f942259be82713bbf8d4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 31 Aug 2018 11:23:41 +0300 Subject: EDAC: Get rid of custom ICPU() macro Replace custom grown macro with generic INTEL_CPU_FAM6() one. No functional change intended. Signed-off-by: Andy Shevchenko Cc: linux-edac Link: http://lkml.kernel.org/r/20180831082341.72363-1-andriy.shevchenko@linux.intel.com Signed-off-by: Borislav Petkov --- drivers/edac/sb_edac.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 07726fb00321..6f3b763af7cd 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3320,17 +3320,14 @@ fail0: return rc; } -#define ICPU(model, table) \ - { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table } - static const struct x86_cpu_id sbridge_cpuids[] = { - ICPU(INTEL_FAM6_SANDYBRIDGE_X, pci_dev_descr_sbridge_table), - ICPU(INTEL_FAM6_IVYBRIDGE_X, pci_dev_descr_ibridge_table), - ICPU(INTEL_FAM6_HASWELL_X, pci_dev_descr_haswell_table), - ICPU(INTEL_FAM6_BROADWELL_X, pci_dev_descr_broadwell_table), - ICPU(INTEL_FAM6_BROADWELL_XEON_D, pci_dev_descr_broadwell_table), - ICPU(INTEL_FAM6_XEON_PHI_KNL, pci_dev_descr_knl_table), - ICPU(INTEL_FAM6_XEON_PHI_KNM, pci_dev_descr_knl_table), + INTEL_CPU_FAM6(SANDYBRIDGE_X, pci_dev_descr_sbridge_table), + INTEL_CPU_FAM6(IVYBRIDGE_X, pci_dev_descr_ibridge_table), + INTEL_CPU_FAM6(HASWELL_X, pci_dev_descr_haswell_table), + INTEL_CPU_FAM6(BROADWELL_X, pci_dev_descr_broadwell_table), + INTEL_CPU_FAM6(BROADWELL_XEON_D, pci_dev_descr_broadwell_table), + INTEL_CPU_FAM6(XEON_PHI_KNL, pci_dev_descr_knl_table), + INTEL_CPU_FAM6(XEON_PHI_KNM, pci_dev_descr_knl_table), { } }; MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids); -- cgit v1.2.3 From dcc960b225ceb2bd66c45e0845d03e577f7010f9 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 7 Sep 2018 16:08:27 -0700 Subject: EDAC, sb_edac: Return early on ADDRV bit and address type test Users of the mce_register_decode_chain() are called for every logged error. EDAC drivers should check: 1) Is this a memory error? [bit 7 in status register] 2) Is there a valid address? [bit 58 in status register] 3) Is the address a system address? [bitfield 8:6 in misc register] The sb_edac driver performed test "1" twice. Waited far too long to perform check "2". Didn't do check "3" at all. Fix it by moving the test for valid address from sbridge_mce_output_error() into sbridge_mce_check_error() and add a test for the type immediately after. Delete the redundant check for the type of the error from sbridge_mce_output_error(). Signed-off-by: Qiuxu Zhuo Cc: Aristeu Rozanski Cc: Mauro Carvalho Chehab Cc: Qiuxu Zhuo Cc: linux-edac Link: http://lkml.kernel.org/r/20180907230828.13901-2-tony.luck@intel.com [ Re-word commit message. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov --- drivers/edac/sb_edac.c | 68 ++++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 6f3b763af7cd..ee2e2e53935f 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2911,35 +2911,27 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, * cccc = channel * If the mask doesn't match, report an error to the parsing logic */ - if (! ((errcode & 0xef80) == 0x80)) { - optype = "Can't parse: it is not a mem"; - } else { - switch (optypenum) { - case 0: - optype = "generic undef request error"; - break; - case 1: - optype = "memory read error"; - break; - case 2: - optype = "memory write error"; - break; - case 3: - optype = "addr/cmd error"; - break; - case 4: - optype = "memory scrubbing error"; - break; - default: - optype = "reserved"; - break; - } + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; } - /* Only decode errors with an valid address (ADDRV) */ - if (!GET_BITFIELD(m->status, 58, 58)) - return; - if (pvt->info.type == KNIGHTS_LANDING) { if (channel == 14) { edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n", @@ -3045,17 +3037,11 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, { struct mce *mce = (struct mce *)data; struct mem_ctl_info *mci; - struct sbridge_pvt *pvt; char *type; if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; - mci = get_mci_for_node_id(mce->socketid, IMC0); - if (!mci) - return NOTIFY_DONE; - pvt = mci->pvt_info; - /* * Just let mcelog handle it if the error is * outside the memory controller. A memory error @@ -3065,6 +3051,22 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, if ((mce->status & 0xefff) >> 7 != 1) return NOTIFY_DONE; + /* Check ADDRV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 58, 58)) + return NOTIFY_DONE; + + /* Check MISCV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 59, 59)) + return NOTIFY_DONE; + + /* Check address type in MISC (physical address only) */ + if (GET_BITFIELD(mce->misc, 6, 8) != 2) + return NOTIFY_DONE; + + mci = get_mci_for_node_id(mce->socketid, IMC0); + if (!mci) + return NOTIFY_DONE; + if (mce->mcgstatus & MCG_STATUS_MCIP) type = "Exception"; else -- cgit v1.2.3 From 8489b17ce29d9a35a36c08bbea93cdce4c98a6ad Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Sep 2018 14:11:45 -0700 Subject: EDAC, sb_edac: Fix reporting for patrol scrubber errors sb_edac sometimes reports the wrong DIMM for a memory error found by the patrol scrubber. That is because the hardware provides only a 4KB page-aligned address for the error case. This means that the EDAC driver will point at the DIMM matching offset 0x0 in the 4KB page, but because of interleaving across channels and ranks, the actual DIMM involved may be different if the error is on some other cache line within the page. Therefore, reconstruct the socket/iMC/channel information from the "mce" structure passed to the EDAC driver. The DIMM cannot be determined, so pass "dimm=-1" to the EDAC core. It will report that all the DIMMs on that channel may be affected. Signed-off-by: Qiuxu Zhuo Cc: Aristeu Rozanski Cc: Mauro Carvalho Chehab Cc: Qiuxu Zhuo Cc: linux-edac Link: http://lkml.kernel.org/r/20180907230828.13901-3-tony.luck@intel.com [ Improve comments on the functions to convert bank number to memory controller number. Minor cleanup to commit message. ] Signed-off-by: Tony Luck [ Massage commit message more. ] Signed-off-by: Borislav Petkov --- drivers/edac/sb_edac.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 110 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index ee2e2e53935f..d4a10793f807 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -326,6 +326,7 @@ struct sbridge_info { const struct interleave_pkg *interleave_pkg; u8 max_sad; u8 (*get_node_id)(struct sbridge_pvt *pvt); + u8 (*get_ha)(u8 bank); enum mem_type (*get_memory_type)(struct sbridge_pvt *pvt); enum dev_type (*get_width)(struct sbridge_pvt *pvt, u32 mtr); struct pci_dev *pci_vtd; @@ -1002,6 +1003,39 @@ static u8 knl_get_node_id(struct sbridge_pvt *pvt) return GET_BITFIELD(reg, 0, 2); } +/* + * Use the reporting bank number to determine which memory + * controller (also known as "ha" for "home agent"). Sandy + * Bridge only has one memory controller per socket, so the + * answer is always zero. + */ +static u8 sbridge_get_ha(u8 bank) +{ + return 0; +} + +/* + * On Ivy Bridge, Haswell and Broadwell the error may be in a + * home agent bank (7, 8), or one of the per-channel memory + * controller banks (9 .. 16). + */ +static u8 ibridge_get_ha(u8 bank) +{ + switch (bank) { + case 7 ... 8: + return bank - 7; + case 9 ... 16: + return (bank - 9) / 4; + default: + return -EINVAL; + } +} + +/* Not used, but included for safety/symmetry */ +static u8 knl_get_ha(u8 bank) +{ + return -EINVAL; +} static u64 haswell_get_tolm(struct sbridge_pvt *pvt) { @@ -2207,6 +2241,60 @@ static int get_memory_error_data(struct mem_ctl_info *mci, return 0; } +static int get_memory_error_data_from_mce(struct mem_ctl_info *mci, + const struct mce *m, u8 *socket, + u8 *ha, long *channel_mask, + char *msg) +{ + u32 reg, channel = GET_BITFIELD(m->status, 0, 3); + struct mem_ctl_info *new_mci; + struct sbridge_pvt *pvt; + struct pci_dev *pci_ha; + bool tad0; + + if (channel >= NUM_CHANNELS) { + sprintf(msg, "Invalid channel 0x%x", channel); + return -EINVAL; + } + + pvt = mci->pvt_info; + if (!pvt->info.get_ha) { + sprintf(msg, "No get_ha()"); + return -EINVAL; + } + *ha = pvt->info.get_ha(m->bank); + if (*ha != 0 && *ha != 1) { + sprintf(msg, "Impossible bank %d", m->bank); + return -EINVAL; + } + + *socket = m->socketid; + new_mci = get_mci_for_node_id(*socket, *ha); + if (!new_mci) { + strcpy(msg, "mci socket got corrupted!"); + return -EINVAL; + } + + pvt = new_mci->pvt_info; + pci_ha = pvt->pci_ha; + pci_read_config_dword(pci_ha, tad_dram_rule[0], ®); + tad0 = m->addr <= TAD_LIMIT(reg); + + *channel_mask = 1 << channel; + if (pvt->mirror_mode == FULL_MIRRORING || + (pvt->mirror_mode == ADDR_RANGE_MIRRORING && tad0)) { + *channel_mask |= 1 << ((channel + 2) % 4); + pvt->is_cur_addr_mirrored = true; + } else { + pvt->is_cur_addr_mirrored = false; + } + + if (pvt->is_lockstep) + *channel_mask |= 1 << ((channel + 1) % 4); + + return 0; +} + /**************************************************************************** Device initialization routines: put/get, init/exit ****************************************************************************/ @@ -2877,10 +2965,16 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, u32 errcode = GET_BITFIELD(m->status, 0, 15); u32 channel = GET_BITFIELD(m->status, 0, 3); u32 optypenum = GET_BITFIELD(m->status, 4, 6); + /* + * Bits 5-0 of MCi_MISC give the least significant bit that is valid. + * A value 6 is for cache line aligned address, a value 12 is for page + * aligned address reported by patrol scrubber. + */ + u32 lsb = GET_BITFIELD(m->misc, 0, 5); long channel_mask, first_channel; - u8 rank, socket, ha; + u8 rank = 0xff, socket, ha; int rc, dimm; - char *area_type = NULL; + char *area_type = "DRAM"; if (pvt->info.type != SANDY_BRIDGE) recoverable = true; @@ -2964,9 +3058,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, optype, msg); } return; - } else { + } else if (lsb < 12) { rc = get_memory_error_data(mci, m->addr, &socket, &ha, - &channel_mask, &rank, &area_type, msg); + &channel_mask, &rank, + &area_type, msg); + } else { + rc = get_memory_error_data_from_mce(mci, m, &socket, &ha, + &channel_mask, msg); } if (rc < 0) @@ -2981,14 +3079,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, first_channel = find_first_bit(&channel_mask, NUM_CHANNELS); - if (rank < 4) + if (rank == 0xff) + dimm = -1; + else if (rank < 4) dimm = 0; else if (rank < 8) dimm = 1; else dimm = 2; - /* * FIXME: On some memory configurations (mirror, lockstep), the * Memory Controller can't point the error to a single DIMM. The @@ -3175,6 +3274,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = get_memory_type; pvt->info.get_node_id = get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3199,6 +3299,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = sbridge_dram_rule; pvt->info.get_memory_type = get_memory_type; pvt->info.get_node_id = get_node_id; + pvt->info.get_ha = sbridge_get_ha; pvt->info.rir_limit = rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3223,6 +3324,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = haswell_get_memory_type; pvt->info.get_node_id = haswell_get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = haswell_rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3247,6 +3349,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = ibridge_dram_rule; pvt->info.get_memory_type = haswell_get_memory_type; pvt->info.get_node_id = haswell_get_node_id; + pvt->info.get_ha = ibridge_get_ha; pvt->info.rir_limit = haswell_rir_limit; pvt->info.sad_limit = sad_limit; pvt->info.interleave_mode = interleave_mode; @@ -3271,6 +3374,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type) pvt->info.dram_rule = knl_dram_rule; pvt->info.get_memory_type = knl_get_memory_type; pvt->info.get_node_id = knl_get_node_id; + pvt->info.get_ha = knl_get_ha; pvt->info.rir_limit = NULL; pvt->info.sad_limit = knl_sad_limit; pvt->info.interleave_mode = knl_interleave_mode; -- cgit v1.2.3 From c968ed08594dadb788a93dc2bc128ed4ef35c93e Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Fri, 14 Sep 2018 13:19:05 -0700 Subject: EDAC, sb_edac: Fix signedness bugs in *_get_ha() functions A static checker gave the following warnings: drivers/edac/sb_edac.c:1030 ibridge_get_ha() warn: signedness bug returning '(-22)' drivers/edac/sb_edac.c:1037 knl_get_ha() warn: signedness bug returning '(-22)' Both because the functions are declared to return a "u8", but try to return -EINVAL for the error case. Fix by returning 0xff (since the caller doesn't look at, or pass on, the return value). Reported-by: Dan Carpenter Signed-off-by: Tony Luck Cc: Qiuxu Zhuo Cc: linux-edac Link: http://lkml.kernel.org/r/20180914201905.GA30946@agluck-desk Signed-off-by: Borislav Petkov --- drivers/edac/sb_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index d4a10793f807..5aeb9a1ee898 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1027,14 +1027,14 @@ static u8 ibridge_get_ha(u8 bank) case 9 ... 16: return (bank - 9) / 4; default: - return -EINVAL; + return 0xff; } } /* Not used, but included for safety/symmetry */ static u8 knl_get_ha(u8 bank) { - return -EINVAL; + return 0xff; } static u64 haswell_get_tolm(struct sbridge_pvt *pvt) -- cgit v1.2.3 From 6f6da136046294a1e8d2944336eb97412751f653 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 18 Sep 2018 17:34:33 -0700 Subject: EDAC: Correct DIMM capacity unit symbol The {i3200|i7core|sb|skx}_edac drivers show DIMM capacity using the wrong unit symbol: 'Mb' - megabit. Fix them by replacing 'Mb' with 'MiB' - mebibyte. [Tony: These are all "edac_dbg()" messages, so this won't break scripts that parse console logs.] Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Acked-by: Aristeu Rozanski Cc: Mauro Carvalho Chehab Cc: linux-edac@vger.kernel.org Link: https://lkml.kernel.org/r/20180919003433.16475-1-tony.luck@intel.com --- drivers/edac/i3200_edac.c | 2 +- drivers/edac/i7core_edac.c | 2 +- drivers/edac/sb_edac.c | 2 +- drivers/edac/skx_edac.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index d92d56cee101..299b441647cd 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -399,7 +399,7 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) if (nr_pages == 0) continue; - edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, + edac_dbg(0, "csrow %d, channel %d%s, size = %ld MiB\n", i, j, stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); dimm->nr_pages = nr_pages; diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 8e120bf60624..bd65f573e381 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -597,7 +597,7 @@ static int get_dimm_config(struct mem_ctl_info *mci) /* DDR3 has 8 I/O banks */ size = (rows * cols * banks * ranks) >> (20 - 3); - edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n", + edac_dbg(0, "\tdimm %d %d MiB offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n", j, size, RANKOFFSET(dimm_dod[j]), banks, ranks, rows, cols); diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 5aeb9a1ee898..cacd0ca12af6 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1656,7 +1656,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, size = ((u64)rows * cols * banks * ranks) >> (20 - 3); npages = MiB_TO_PAGES(size); - edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + edac_dbg(0, "mc#%d: ha %d channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", pvt->sbridge_dev->mc, pvt->sbridge_dev->dom, i, j, size, npages, banks, ranks, rows, cols); diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index fae095162c01..b2987efa2c6d 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -364,7 +364,7 @@ static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm, size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3); npages = MiB_TO_PAGES(size); - edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", imc->mc, chan, dimmno, size, npages, banks, 1 << ranks, rows, cols); @@ -424,7 +424,7 @@ unknown_size: dimm->mtype = MEM_NVDIMM; dimm->edac_mode = EDAC_SECDED; /* likely better than this */ - edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu Mb (%u pages)\n", + edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n", imc->mc, chan, dimmno, size >> 20, dimm->nr_pages); snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", -- cgit v1.2.3 From c798c88f3962ddff89c7aa818986caeecd46ab4c Mon Sep 17 00:00:00 2001 From: Fan Wu Date: Wed, 19 Sep 2018 01:59:00 +0000 Subject: EDAC, ghes: Use CPER module handles to locate DIMMs Use SMBIOS module handle type 17, on platforms which provide valid ones, to locate the corresponding DIMM and thus have per-DIMM error counter updates. Signed-off-by: Fan Wu [ Massage commit message. ] Signed-off-by: Borislav Petkov Reviewed-by: Tyler Baicar Reviewed-by: James Morse Tested-by: Toshi Kani Cc: Mauro Carvalho Chehab Cc: baicar.tyler@gmail.com Cc: john.garry@huawei.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux-edac Cc: shiju.jose@huawei.com Cc: tanxiaofei@huawei.com Cc: wanghuiqiang@huawei.com Link: http://lkml.kernel.org/r/1537322340-1860-1-git-send-email-wufan@codeaurora.org --- drivers/edac/ghes_edac.c | 23 +++++++++++++++++++++++ include/linux/edac.h | 2 ++ 2 files changed, 25 insertions(+) (limited to 'drivers') diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 473aeec4b1da..49396bf6ad88 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -81,6 +81,18 @@ static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) (*num_dimm)++; } +static int get_dimm_smbios_index(u16 handle) +{ + struct mem_ctl_info *mci = ghes_pvt->mci; + int i; + + for (i = 0; i < mci->tot_dimms; i++) { + if (mci->dimms[i]->smbios_handle == handle) + return i; + } + return -1; +} + static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) { struct ghes_edac_dimm_fill *dimm_fill = arg; @@ -177,6 +189,8 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) entry->total_width, entry->data_width); } + dimm->smbios_handle = entry->handle; + dimm_fill->count++; } } @@ -327,12 +341,21 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { const char *bank = NULL, *device = NULL; + int index = -1; + dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device); if (bank != NULL && device != NULL) p += sprintf(p, "DIMM location:%s %s ", bank, device); else p += sprintf(p, "DIMM DMI handle: 0x%.4x ", mem_err->mem_dev_handle); + + index = get_dimm_smbios_index(mem_err->mem_dev_handle); + if (index >= 0) { + e->top_layer = index; + e->enable_per_layer_report = true; + } + } if (p > e->location) *(p - 1) = '\0'; diff --git a/include/linux/edac.h b/include/linux/edac.h index bffb97828ed6..a45ce1f84bfc 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -451,6 +451,8 @@ struct dimm_info { u32 nr_pages; /* number of pages on this dimm */ unsigned csrow, cschannel; /* Points to the old API data */ + + u16 smbios_handle; /* Handle for SMBIOS type 17 */ }; /** -- cgit v1.2.3 From 0751736905710e20130e70f33cb40014af0e922e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 25 Sep 2018 02:04:03 +0000 Subject: EDAC, i7core: Remove set but not used variable pvt Remove the unused local variable pvt: drivers/edac/i7core_edac.c: In function 'i7core_mce_check_error': drivers/edac/i7core_edac.c:1818:21: warning: variable 'pvt' set but not used \ [-Wunused-but-set-variable] Signed-off-by: YueHaibing Signed-off-by: Borislav Petkov Cc: Mauro Carvalho Chehab Cc: linux-edac Link: http://lkml.kernel.org/r/1537841043-108267-1-git-send-email-yuehaibing@huawei.com --- drivers/edac/i7core_edac.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index bd65f573e381..943aab62dea1 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1815,14 +1815,12 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, struct mce *mce = (struct mce *)data; struct i7core_dev *i7_dev; struct mem_ctl_info *mci; - struct i7core_pvt *pvt; i7_dev = get_i7core_dev(mce->socketid); if (!i7_dev) return NOTIFY_DONE; mci = i7_dev->mci; - pvt = mci->pvt_info; /* * Just let mcelog handle it if the error is -- cgit v1.2.3 From d5fc9125566c9190ee9c7900859f5538a62e166a Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Tue, 25 Sep 2018 08:48:58 -0500 Subject: EDAC, altera: Combine Stratix10 and Arria10 probe functions On Stratix10, the ECC offsets are similar to the existing Arria10 functions and this can be leveraged to simplify the EDAC driver as follows: 1. Fold Stratix10 specifics into Arria10 structures and functions. 2. Implement the Stratix10 System Manager register accesses using a custom regmap to allow use with the Arria10 System Manager regmaps. 3. Stratix10 double bit errors are implemented as SError instead of interrupts so use a panic notifier. Signed-off-by: Thor Thayer Signed-off-by: Borislav Petkov Cc: dinguyen@kernel.org Cc: robh+dt@kernel.org Cc: mark.rutland@arm.com Cc: mchehab@kernel.org Cc: devicetree@vger.kernel.org Cc: linux-edac@vger.kernel.org Link: https://lkml.kernel.org/r/1537883342-30180-3-git-send-email-thor.thayer@linux.intel.com --- drivers/edac/altera_edac.c | 263 +++++++++++++++------------------------------ drivers/edac/altera_edac.h | 10 +- 2 files changed, 90 insertions(+), 183 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 5762c3c383f2..e2b66a20d064 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -2146,6 +2146,35 @@ static const struct irq_domain_ops a10_eccmgr_ic_ops = { .xlate = irq_domain_xlate_twocell, }; +/************** Stratix 10 EDAC Double Bit Error Handler ************/ +#define to_a10edac(p, m) container_of(p, struct altr_arria10_edac, m) + +/* + * The double bit error is handled through SError which is fatal. This is + * called as a panic notifier to printout ECC error info as part of the panic. + */ +static int s10_edac_dberr_handler(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct altr_arria10_edac *edac = to_a10edac(this, panic_notifier); + int err_addr, dberror; + + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_ECC_INTSTAT_DERR_OFST, + &dberror); + regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror); + if (dberror & S10_DDR0_IRQ_MASK) { + regmap_read(edac->ecc_mgr_map, S10_DERRADDR_OFST, &err_addr); + regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST, + err_addr); + edac_printk(KERN_ERR, EDAC_MC, + "EDAC: [Uncorrectable errors @ 0x%08X]\n\n", + err_addr); + } + + return NOTIFY_DONE; +} + +/****************** Arria 10 EDAC Probe Function *********************/ static int altr_edac_a10_probe(struct platform_device *pdev) { struct altr_arria10_edac *edac; @@ -2159,8 +2188,33 @@ static int altr_edac_a10_probe(struct platform_device *pdev) platform_set_drvdata(pdev, edac); INIT_LIST_HEAD(&edac->a10_ecc_devices); - edac->ecc_mgr_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + if (socfpga_is_a10()) { + edac->ecc_mgr_map = + syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "altr,sysmgr-syscon"); + } else { + struct device_node *sysmgr_np; + struct resource res; + void __iomem *base; + + sysmgr_np = of_parse_phandle(pdev->dev.of_node, + "altr,sysmgr-syscon", 0); + if (!sysmgr_np) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to find altr,sysmgr-syscon\n"); + return -ENODEV; + } + + if (of_address_to_resource(sysmgr_np, 0, &res)) + return -ENOMEM; + + /* Need physical address for SMCC call */ + base = (void __iomem *)res.start; + + edac->ecc_mgr_map = devm_regmap_init(&pdev->dev, NULL, base, + &s10_sdram_regmap_cfg); + } + if (IS_ERR(edac->ecc_mgr_map)) { edac_printk(KERN_ERR, EDAC_DEVICE, "Unable to get syscon altr,sysmgr-syscon\n"); @@ -2187,14 +2241,38 @@ static int altr_edac_a10_probe(struct platform_device *pdev) altr_edac_a10_irq_handler, edac); - edac->db_irq = platform_get_irq(pdev, 1); - if (edac->db_irq < 0) { - dev_err(&pdev->dev, "No DBERR IRQ resource\n"); - return edac->db_irq; + if (socfpga_is_a10()) { + edac->db_irq = platform_get_irq(pdev, 1); + if (edac->db_irq < 0) { + dev_err(&pdev->dev, "No DBERR IRQ resource\n"); + return edac->db_irq; + } + irq_set_chained_handler_and_data(edac->db_irq, + altr_edac_a10_irq_handler, + edac); + } else { + int dberror, err_addr; + + edac->panic_notifier.notifier_call = s10_edac_dberr_handler; + atomic_notifier_chain_register(&panic_notifier_list, + &edac->panic_notifier); + + /* Printout a message if uncorrectable error previously. */ + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, + &dberror); + if (dberror) { + regmap_read(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST, + &err_addr); + edac_printk(KERN_ERR, EDAC_DEVICE, + "Previous Boot UE detected[0x%X] @ 0x%X\n", + dberror, err_addr); + /* Reset the sticky registers */ + regmap_write(edac->ecc_mgr_map, + S10_SYSMGR_UE_VAL_OFST, 0); + regmap_write(edac->ecc_mgr_map, + S10_SYSMGR_UE_ADDR_OFST, 0); + } } - irq_set_chained_handler_and_data(edac->db_irq, - altr_edac_a10_irq_handler, - edac); for_each_child_of_node(pdev->dev.of_node, child) { if (!of_device_is_available(child)) @@ -2211,7 +2289,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev) altr_edac_a10_device_add(edac, child); - else if (of_device_is_compatible(child, "altr,sdram-edac-a10")) + else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) || + (of_device_is_compatible(child, "altr,sdram-edac-s10"))) of_platform_populate(pdev->dev.of_node, altr_sdram_ctrl_of_match, NULL, &pdev->dev); @@ -2222,6 +2301,7 @@ static int altr_edac_a10_probe(struct platform_device *pdev) static const struct of_device_id altr_edac_a10_of_match[] = { { .compatible = "altr,socfpga-a10-ecc-manager" }, + { .compatible = "altr,socfpga-s10-ecc-manager" }, {}, }; MODULE_DEVICE_TABLE(of, altr_edac_a10_of_match); @@ -2235,171 +2315,6 @@ static struct platform_driver altr_edac_a10_driver = { }; module_platform_driver(altr_edac_a10_driver); -/************** Stratix 10 EDAC Device Controller Functions> ************/ - -#define to_s10edac(p, m) container_of(p, struct altr_stratix10_edac, m) - -/* - * The double bit error is handled through SError which is fatal. This is - * called as a panic notifier to printout ECC error info as part of the panic. - */ -static int s10_edac_dberr_handler(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct altr_stratix10_edac *edac = to_s10edac(this, panic_notifier); - int err_addr, dberror; - - s10_protected_reg_read(edac, S10_SYSMGR_ECC_INTSTAT_DERR_OFST, - &dberror); - /* Remember the UE Errors for a reboot */ - s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, dberror); - if (dberror & S10_DDR0_IRQ_MASK) { - s10_protected_reg_read(edac, S10_DERRADDR_OFST, &err_addr); - /* Remember the UE Error address */ - s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST, - err_addr); - edac_printk(KERN_ERR, EDAC_MC, - "EDAC: [Uncorrectable errors @ 0x%08X]\n\n", - err_addr); - } - - return NOTIFY_DONE; -} - -static void altr_edac_s10_irq_handler(struct irq_desc *desc) -{ - struct altr_stratix10_edac *edac = irq_desc_get_handler_data(desc); - struct irq_chip *chip = irq_desc_get_chip(desc); - int irq = irq_desc_get_irq(desc); - int bit, sm_offset, irq_status; - - sm_offset = S10_SYSMGR_ECC_INTSTAT_SERR_OFST; - - chained_irq_enter(chip, desc); - - s10_protected_reg_read(NULL, sm_offset, &irq_status); - - for_each_set_bit(bit, (unsigned long *)&irq_status, 32) { - irq = irq_linear_revmap(edac->domain, bit); - if (irq) - generic_handle_irq(irq); - } - - chained_irq_exit(chip, desc); -} - -static void s10_eccmgr_irq_mask(struct irq_data *d) -{ - struct altr_stratix10_edac *edac = irq_data_get_irq_chip_data(d); - - s10_protected_reg_write(edac, S10_SYSMGR_ECC_INTMASK_SET_OFST, - BIT(d->hwirq)); -} - -static void s10_eccmgr_irq_unmask(struct irq_data *d) -{ - struct altr_stratix10_edac *edac = irq_data_get_irq_chip_data(d); - - s10_protected_reg_write(edac, S10_SYSMGR_ECC_INTMASK_CLR_OFST, - BIT(d->hwirq)); -} - -static int s10_eccmgr_irqdomain_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hwirq) -{ - struct altr_stratix10_edac *edac = d->host_data; - - irq_set_chip_and_handler(irq, &edac->irq_chip, handle_simple_irq); - irq_set_chip_data(irq, edac); - irq_set_noprobe(irq); - - return 0; -} - -static const struct irq_domain_ops s10_eccmgr_ic_ops = { - .map = s10_eccmgr_irqdomain_map, - .xlate = irq_domain_xlate_twocell, -}; - -static int altr_edac_s10_probe(struct platform_device *pdev) -{ - struct altr_stratix10_edac *edac; - struct device_node *child; - int dberror, err_addr; - - edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL); - if (!edac) - return -ENOMEM; - - edac->dev = &pdev->dev; - platform_set_drvdata(pdev, edac); - INIT_LIST_HEAD(&edac->s10_ecc_devices); - - edac->irq_chip.name = pdev->dev.of_node->name; - edac->irq_chip.irq_mask = s10_eccmgr_irq_mask; - edac->irq_chip.irq_unmask = s10_eccmgr_irq_unmask; - edac->domain = irq_domain_add_linear(pdev->dev.of_node, 64, - &s10_eccmgr_ic_ops, edac); - if (!edac->domain) { - dev_err(&pdev->dev, "Error adding IRQ domain\n"); - return -ENOMEM; - } - - edac->sb_irq = platform_get_irq(pdev, 0); - if (edac->sb_irq < 0) { - dev_err(&pdev->dev, "No SBERR IRQ resource\n"); - return edac->sb_irq; - } - - irq_set_chained_handler_and_data(edac->sb_irq, - altr_edac_s10_irq_handler, - edac); - - edac->panic_notifier.notifier_call = s10_edac_dberr_handler; - atomic_notifier_chain_register(&panic_notifier_list, - &edac->panic_notifier); - - /* Printout a message if uncorrectable error previously. */ - s10_protected_reg_read(edac, S10_SYSMGR_UE_VAL_OFST, &dberror); - if (dberror) { - s10_protected_reg_read(edac, S10_SYSMGR_UE_ADDR_OFST, - &err_addr); - edac_printk(KERN_ERR, EDAC_DEVICE, - "Previous Boot UE detected[0x%X] @ 0x%X\n", - dberror, err_addr); - /* Reset the sticky registers */ - s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, 0); - s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST, 0); - } - - for_each_child_of_node(pdev->dev.of_node, child) { - if (!of_device_is_available(child)) - continue; - - if (of_device_is_compatible(child, "altr,sdram-edac-s10")) - of_platform_populate(pdev->dev.of_node, - altr_sdram_ctrl_of_match, - NULL, &pdev->dev); - } - - return 0; -} - -static const struct of_device_id altr_edac_s10_of_match[] = { - { .compatible = "altr,socfpga-s10-ecc-manager" }, - {}, -}; -MODULE_DEVICE_TABLE(of, altr_edac_s10_of_match); - -static struct platform_driver altr_edac_s10_driver = { - .probe = altr_edac_s10_probe, - .driver = { - .name = "socfpga_s10_ecc_manager", - .of_match_table = altr_edac_s10_of_match, - }, -}; -module_platform_driver(altr_edac_s10_driver); - MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Thor Thayer"); MODULE_DESCRIPTION("EDAC Driver for Altera Memories"); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 81f0554e09de..d925c4cee01b 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -370,6 +370,7 @@ struct altr_arria10_edac { struct irq_domain *domain; struct irq_chip irq_chip; struct list_head a10_ecc_devices; + struct notifier_block panic_notifier; }; /* @@ -437,13 +438,4 @@ struct altr_arria10_edac { #define INTEL_SIP_SMC_REG_WRITE \ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE) -struct altr_stratix10_edac { - struct device *dev; - int sb_irq; - struct irq_domain *domain; - struct irq_chip irq_chip; - struct list_head s10_ecc_devices; - struct notifier_block panic_notifier; -}; - #endif /* #ifndef _ALTERA_EDAC_H */ -- cgit v1.2.3 From 08f08bfb7b4cc7b7037c8818acb1b6abccf2bd0e Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Tue, 25 Sep 2018 08:49:00 -0500 Subject: EDAC, altera: Merge Stratix10 into the Arria10 SDRAM probe routine Change Stratix10 regmap to use offsets from a base to match the Arria10 regmap and allow re-use of the Arria10 functions. Only the regmap initialization differs (Arria10 mmio_regmap vs Stratix10 custom regmap). Modify the SDRAM probe function to handle Stratix10. Remove the Stratix10 offset defines if Arria10 can be used. Remove the unused Stratix10 probe function. Signed-off-by: Thor Thayer Signed-off-by: Borislav Petkov Cc: dinguyen@kernel.org Cc: robh+dt@kernel.org Cc: mark.rutland@arm.com Cc: mchehab@kernel.org Cc: devicetree@vger.kernel.org Cc: linux-edac@vger.kernel.org Link: https://lkml.kernel.org/r/1537883342-30180-5-git-send-email-thor.thayer@linux.intel.com --- drivers/edac/altera_edac.c | 276 ++------------------------------------------- drivers/edac/altera_edac.h | 38 +------ 2 files changed, 14 insertions(+), 300 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index e2b66a20d064..48b58063a7a3 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -69,25 +69,6 @@ static const struct altr_sdram_prv_data a10_data = { .ue_set_mask = A10_DIAGINT_TDERRA_MASK, }; -static const struct altr_sdram_prv_data s10_data = { - .ecc_ctrl_offset = S10_ECCCTRL1_OFST, - .ecc_ctl_en_mask = A10_ECCCTRL1_ECC_EN, - .ecc_stat_offset = S10_INTSTAT_OFST, - .ecc_stat_ce_mask = A10_INTSTAT_SBEERR, - .ecc_stat_ue_mask = A10_INTSTAT_DBEERR, - .ecc_saddr_offset = S10_SERRADDR_OFST, - .ecc_daddr_offset = S10_DERRADDR_OFST, - .ecc_irq_en_offset = S10_ERRINTEN_OFST, - .ecc_irq_en_mask = A10_ECC_IRQ_EN_MASK, - .ecc_irq_clr_offset = S10_INTSTAT_OFST, - .ecc_irq_clr_mask = (A10_INTSTAT_SBEERR | A10_INTSTAT_DBEERR), - .ecc_cnt_rst_offset = S10_ECCCTRL1_OFST, - .ecc_cnt_rst_mask = A10_ECC_CNT_RESET_MASK, - .ce_ue_trgr_offset = S10_DIAGINTTEST_OFST, - .ce_set_mask = A10_DIAGINT_TSERRA_MASK, - .ue_set_mask = A10_DIAGINT_TDERRA_MASK, -}; - /*********************** EDAC Memory Controller Functions ****************/ /* The SDRAM controller uses the EDAC Memory Controller framework. */ @@ -239,7 +220,7 @@ static unsigned long get_total_mem(void) static const struct of_device_id altr_sdram_ctrl_of_match[] = { { .compatible = "altr,sdram-edac", .data = &c5_data}, { .compatible = "altr,sdram-edac-a10", .data = &a10_data}, - { .compatible = "altr,sdram-edac-s10", .data = &s10_data}, + { .compatible = "altr,sdram-edac-s10", .data = &a10_data}, {}, }; MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match); @@ -293,6 +274,7 @@ release: return ret; } +static int socfpga_is_a10(void); static int altr_sdram_probe(struct platform_device *pdev) { const struct of_device_id *id; @@ -416,7 +398,7 @@ static int altr_sdram_probe(struct platform_device *pdev) goto err; /* Only the Arria10 has separate IRQs */ - if (irq2 > 0) { + if (socfpga_is_a10()) { /* Arria10 specific initialization */ res = a10_init(mc_vbase); if (res < 0) @@ -502,8 +484,9 @@ static int s10_protected_reg_write(void *context, unsigned int reg, unsigned int val) { struct arm_smccc_res result; + unsigned long offset = (unsigned long)context; - arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, reg, val, 0, 0, + arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, offset + reg, val, 0, 0, 0, 0, 0, &result); return (int)result.a0; @@ -523,8 +506,9 @@ static int s10_protected_reg_read(void *context, unsigned int reg, unsigned int *val) { struct arm_smccc_res result; + unsigned long offset = (unsigned long)context; - arm_smccc_smc(INTEL_SIP_SMC_REG_READ, reg, 0, 0, 0, + arm_smccc_smc(INTEL_SIP_SMC_REG_READ, offset + reg, 0, 0, 0, 0, 0, 0, &result); *val = (unsigned int)result.a1; @@ -532,245 +516,17 @@ static int s10_protected_reg_read(void *context, unsigned int reg, return (int)result.a0; } -static bool s10_sdram_writeable_reg(struct device *dev, unsigned int reg) -{ - switch (reg) { - case S10_ECCCTRL1_OFST: - case S10_ERRINTEN_OFST: - case S10_INTMODE_OFST: - case S10_INTSTAT_OFST: - case S10_DIAGINTTEST_OFST: - case S10_SYSMGR_ECC_INTMASK_VAL_OFST: - case S10_SYSMGR_ECC_INTMASK_SET_OFST: - case S10_SYSMGR_ECC_INTMASK_CLR_OFST: - return true; - } - return false; -} - -static bool s10_sdram_readable_reg(struct device *dev, unsigned int reg) -{ - switch (reg) { - case S10_ECCCTRL1_OFST: - case S10_ERRINTEN_OFST: - case S10_INTMODE_OFST: - case S10_INTSTAT_OFST: - case S10_DERRADDR_OFST: - case S10_SERRADDR_OFST: - case S10_DIAGINTTEST_OFST: - case S10_SYSMGR_ECC_INTMASK_VAL_OFST: - case S10_SYSMGR_ECC_INTMASK_SET_OFST: - case S10_SYSMGR_ECC_INTMASK_CLR_OFST: - case S10_SYSMGR_ECC_INTSTAT_SERR_OFST: - case S10_SYSMGR_ECC_INTSTAT_DERR_OFST: - return true; - } - return false; -} - -static bool s10_sdram_volatile_reg(struct device *dev, unsigned int reg) -{ - switch (reg) { - case S10_ECCCTRL1_OFST: - case S10_ERRINTEN_OFST: - case S10_INTMODE_OFST: - case S10_INTSTAT_OFST: - case S10_DERRADDR_OFST: - case S10_SERRADDR_OFST: - case S10_DIAGINTTEST_OFST: - case S10_SYSMGR_ECC_INTMASK_VAL_OFST: - case S10_SYSMGR_ECC_INTMASK_SET_OFST: - case S10_SYSMGR_ECC_INTMASK_CLR_OFST: - case S10_SYSMGR_ECC_INTSTAT_SERR_OFST: - case S10_SYSMGR_ECC_INTSTAT_DERR_OFST: - return true; - } - return false; -} - static const struct regmap_config s10_sdram_regmap_cfg = { .name = "s10_ddr", .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0xffffffff, - .writeable_reg = s10_sdram_writeable_reg, - .readable_reg = s10_sdram_readable_reg, - .volatile_reg = s10_sdram_volatile_reg, + .max_register = 0xffd12228, .reg_read = s10_protected_reg_read, .reg_write = s10_protected_reg_write, .use_single_rw = true, }; -static int altr_s10_sdram_probe(struct platform_device *pdev) -{ - const struct of_device_id *id; - struct edac_mc_layer layers[2]; - struct mem_ctl_info *mci; - struct altr_sdram_mc_data *drvdata; - const struct altr_sdram_prv_data *priv; - struct regmap *regmap; - struct dimm_info *dimm; - u32 read_reg; - int irq, ret = 0; - unsigned long mem_size; - - id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev); - if (!id) - return -ENODEV; - - /* Grab specific offsets and masks for Stratix10 */ - priv = of_match_node(altr_sdram_ctrl_of_match, - pdev->dev.of_node)->data; - - regmap = devm_regmap_init(&pdev->dev, NULL, (void *)priv, - &s10_sdram_regmap_cfg); - if (IS_ERR(regmap)) - return PTR_ERR(regmap); - - /* Validate the SDRAM controller has ECC enabled */ - if (regmap_read(regmap, priv->ecc_ctrl_offset, &read_reg) || - ((read_reg & priv->ecc_ctl_en_mask) != priv->ecc_ctl_en_mask)) { - edac_printk(KERN_ERR, EDAC_MC, - "No ECC/ECC disabled [0x%08X]\n", read_reg); - return -ENODEV; - } - - /* Grab memory size from device tree. */ - mem_size = get_total_mem(); - if (!mem_size) { - edac_printk(KERN_ERR, EDAC_MC, "Unable to calculate memory size\n"); - return -ENODEV; - } - - /* Ensure the SDRAM Interrupt is disabled */ - if (regmap_update_bits(regmap, priv->ecc_irq_en_offset, - priv->ecc_irq_en_mask, 0)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error disabling SDRAM ECC IRQ\n"); - return -ENODEV; - } - - /* Toggle to clear the SDRAM Error count */ - if (regmap_update_bits(regmap, priv->ecc_cnt_rst_offset, - priv->ecc_cnt_rst_mask, - priv->ecc_cnt_rst_mask)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error clearing SDRAM ECC count\n"); - return -ENODEV; - } - - if (regmap_update_bits(regmap, priv->ecc_cnt_rst_offset, - priv->ecc_cnt_rst_mask, 0)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error clearing SDRAM ECC count\n"); - return -ENODEV; - } - - irq = platform_get_irq(pdev, 0); - if (irq < 0) { - edac_printk(KERN_ERR, EDAC_MC, - "No irq %d in DT\n", irq); - return -ENODEV; - } - - layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = 1; - layers[0].is_virt_csrow = true; - layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = 1; - layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, - sizeof(struct altr_sdram_mc_data)); - if (!mci) - return -ENOMEM; - - mci->pdev = &pdev->dev; - drvdata = mci->pvt_info; - drvdata->mc_vbase = regmap; - drvdata->data = priv; - platform_set_drvdata(pdev, mci); - - if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { - edac_printk(KERN_ERR, EDAC_MC, - "Unable to get managed device resource\n"); - ret = -ENOMEM; - goto free; - } - - mci->mtype_cap = MEM_FLAG_DDR3; - mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; - mci->edac_cap = EDAC_FLAG_SECDED; - mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = dev_name(&pdev->dev); - mci->scrub_mode = SCRUB_SW_SRC; - mci->dev_name = dev_name(&pdev->dev); - - dimm = *mci->dimms; - dimm->nr_pages = ((mem_size - 1) >> PAGE_SHIFT) + 1; - dimm->grain = 8; - dimm->dtype = DEV_X8; - dimm->mtype = MEM_DDR3; - dimm->edac_mode = EDAC_SECDED; - - ret = edac_mc_add_mc(mci); - if (ret < 0) - goto err; - - ret = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler, - IRQF_SHARED, dev_name(&pdev->dev), mci); - if (ret < 0) { - edac_mc_printk(mci, KERN_ERR, - "Unable to request irq %d\n", irq); - ret = -ENODEV; - goto err2; - } - - if (regmap_write(regmap, S10_SYSMGR_ECC_INTMASK_CLR_OFST, - S10_DDR0_IRQ_MASK)) { - edac_printk(KERN_ERR, EDAC_MC, - "Error clearing SDRAM ECC count\n"); - ret = -ENODEV; - goto err2; - } - - if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset, - priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) { - edac_mc_printk(mci, KERN_ERR, - "Error enabling SDRAM ECC IRQ\n"); - ret = -ENODEV; - goto err2; - } - - altr_sdr_mc_create_debugfs_nodes(mci); - - devres_close_group(&pdev->dev, NULL); - - return 0; - -err2: - edac_mc_del_mc(&pdev->dev); -err: - devres_release_group(&pdev->dev, NULL); -free: - edac_mc_free(mci); - edac_printk(KERN_ERR, EDAC_MC, - "EDAC Probe Failed; Error %d\n", ret); - - return ret; -} - -static int altr_s10_sdram_remove(struct platform_device *pdev) -{ - struct mem_ctl_info *mci = platform_get_drvdata(pdev); - - edac_mc_del_mc(&pdev->dev); - edac_mc_free(mci); - platform_set_drvdata(pdev, NULL); - - return 0; -} - /************** ***********/ /* @@ -804,20 +560,6 @@ static struct platform_driver altr_sdram_edac_driver = { module_platform_driver(altr_sdram_edac_driver); -static struct platform_driver altr_s10_sdram_edac_driver = { - .probe = altr_s10_sdram_probe, - .remove = altr_s10_sdram_remove, - .driver = { - .name = "altr_s10_sdram_edac", -#ifdef CONFIG_PM - .pm = &altr_sdram_pm_ops, -#endif - .of_match_table = altr_sdram_ctrl_of_match, - }, -}; - -module_platform_driver(altr_s10_sdram_edac_driver); - /************************* EDAC Parent Probe *************************/ static const struct of_device_id altr_edac_device_of_match[]; @@ -2163,7 +1905,7 @@ static int s10_edac_dberr_handler(struct notifier_block *this, &dberror); regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror); if (dberror & S10_DDR0_IRQ_MASK) { - regmap_read(edac->ecc_mgr_map, S10_DERRADDR_OFST, &err_addr); + regmap_read(edac->ecc_mgr_map, A10_DERRADDR_OFST, &err_addr); regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST, err_addr); edac_printk(KERN_ERR, EDAC_MC, diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index d925c4cee01b..e9e3f9b61947 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -156,34 +156,6 @@ #define A10_INTMASK_CLR_OFST 0x10 #define A10_DDR0_IRQ_MASK BIT(17) -/************* Stratix10 Defines **************/ - -/* SDRAM Controller EccCtrl Register */ -#define S10_ECCCTRL1_OFST 0xF8011100 - -/* SDRAM Controller DRAM IRQ Register */ -#define S10_ERRINTEN_OFST 0xF8011110 - -/* SDRAM Interrupt Mode Register */ -#define S10_INTMODE_OFST 0xF801111C - -/* SDRAM Controller Error Status Register */ -#define S10_INTSTAT_OFST 0xF8011120 - -/* SDRAM Controller ECC Error Address Register */ -#define S10_DERRADDR_OFST 0xF801112C -#define S10_SERRADDR_OFST 0xF8011130 - -/* SDRAM Controller ECC Diagnostic Register */ -#define S10_DIAGINTTEST_OFST 0xF8011124 - -/* SDRAM Single Bit Error Count Compare Set Register */ -#define S10_SERRCNTREG_OFST 0xF801113C - -/* Sticky registers for Uncorrected Errors */ -#define S10_SYSMGR_UE_VAL_OFST 0xFFD12220 -#define S10_SYSMGR_UE_ADDR_OFST 0xFFD12224 - struct altr_sdram_prv_data { int ecc_ctrl_offset; int ecc_ctl_en_mask; @@ -319,12 +291,12 @@ struct altr_sdram_mc_data { /************* Stratix10 Defines **************/ /* Stratix10 ECC Manager Defines */ -#define S10_SYSMGR_ECC_INTMASK_VAL_OFST 0xFFD12090 -#define S10_SYSMGR_ECC_INTMASK_SET_OFST 0xFFD12094 -#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0xFFD12098 +#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98 +#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 -#define S10_SYSMGR_ECC_INTSTAT_SERR_OFST 0xFFD1209C -#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xFFD120A0 +/* Sticky registers for Uncorrected Errors */ +#define S10_SYSMGR_UE_VAL_OFST 0x120 +#define S10_SYSMGR_UE_ADDR_OFST 0x124 #define S10_DDR0_IRQ_MASK BIT(16) -- cgit v1.2.3 From 064acbd4f4ab509dd3f31e1a2d1e04a43d5b1009 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Tue, 25 Sep 2018 08:49:01 -0500 Subject: EDAC, altera: Add Stratix10 peripheral support Add a new peripheral ECC error injection algorithm for Stratix10 and some Arria10 peripherals. Inject a single bit error and upon readback, it will be corrected and the SBE IRQ handler will be called. Add regmap selection for Stratix10 or Arria10 peripheral device memory initialization. Add checks for both Arria10 and Stratix10 to the peripheral ECC setup. Signed-off-by: Thor Thayer Signed-off-by: Borislav Petkov Cc: dinguyen@kernel.org Cc: robh+dt@kernel.org Cc: mark.rutland@arm.com Cc: mchehab@kernel.org Cc: devicetree@vger.kernel.org Cc: linux-edac@vger.kernel.org Link: https://lkml.kernel.org/r/1537883342-30180-6-git-send-email-thor.thayer@linux.intel.com --- drivers/edac/altera_edac.c | 129 +++++++++++++++++++++++++++++++++++++++++---- drivers/edac/altera_edac.h | 25 +++++++++ 2 files changed, 143 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 48b58063a7a3..523ae84a4dd8 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -713,6 +713,16 @@ static const struct file_operations altr_edac_a10_device_inject_fops = { .llseek = generic_file_llseek, }; +static ssize_t altr_edac_a10_device_trig2(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos); + +static const struct file_operations altr_edac_a10_device_inject2_fops = { + .open = simple_open, + .write = altr_edac_a10_device_trig2, + .llseek = generic_file_llseek, +}; + static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, const struct edac_device_prv_data *priv) { @@ -994,6 +1004,16 @@ static int __maybe_unused altr_init_memory_port(void __iomem *ioaddr, int port) return ret; } +static int socfpga_is_a10(void) +{ + return of_machine_is_compatible("altr,socfpga-arria10"); +} + +static int socfpga_is_s10(void) +{ + return of_machine_is_compatible("altr,socfpga-stratix10"); +} + static __init int __maybe_unused altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, u32 ecc_ctrl_en_mask, bool dual_port) @@ -1008,8 +1028,32 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, /* Get the ECC Manager - parent of the device EDACs */ np_eccmgr = of_get_parent(np); - ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr, - "altr,sysmgr-syscon"); + + if (socfpga_is_a10()) { + ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr, + "altr,sysmgr-syscon"); + } else { + struct device_node *sysmgr_np; + struct resource res; + void __iomem *base; + + sysmgr_np = of_parse_phandle(np_eccmgr, + "altr,sysmgr-syscon", 0); + if (!sysmgr_np) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "Unable to find altr,sysmgr-syscon\n"); + return -ENODEV; + } + + if (of_address_to_resource(sysmgr_np, 0, &res)) + return -ENOMEM; + + /* Need physical address for SMCC call */ + base = (void __iomem *)res.start; + + ecc_mgr_map = regmap_init(NULL, NULL, base, + &s10_sdram_regmap_cfg); + } of_node_put(np_eccmgr); if (IS_ERR(ecc_mgr_map)) { edac_printk(KERN_ERR, EDAC_DEVICE, @@ -1067,11 +1111,6 @@ out: return ret; } -static int socfpga_is_a10(void) -{ - return of_machine_is_compatible("altr,socfpga-arria10"); -} - static int validate_parent_available(struct device_node *np); static const struct of_device_id altr_edac_a10_device_of_match[]; static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) @@ -1079,7 +1118,7 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) int irq; struct device_node *child, *np; - if (!socfpga_is_a10()) + if (!socfpga_is_a10() && !socfpga_is_s10()) return -ENODEV; np = of_find_compatible_node(NULL, NULL, @@ -1325,7 +1364,7 @@ static const struct edac_device_prv_data a10_enetecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject_fops, + .inject_fops = &altr_edac_a10_device_inject2_fops, }; static int __init socfpga_init_ethernet_ecc(void) @@ -1403,7 +1442,7 @@ static const struct edac_device_prv_data a10_usbecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject_fops, + .inject_fops = &altr_edac_a10_device_inject2_fops, }; static int __init socfpga_init_usb_ecc(void) @@ -1601,7 +1640,7 @@ static int __init socfpga_init_sdmmc_ecc(void) int rc = -ENODEV; struct device_node *child; - if (!socfpga_is_a10()) + if (!socfpga_is_a10() && !socfpga_is_s10()) return -ENODEV; child = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); @@ -1685,6 +1724,74 @@ static ssize_t altr_edac_a10_device_trig(struct file *file, writel(priv->ue_set_mask, set_addr); else writel(priv->ce_set_mask, set_addr); + + /* Ensure the interrupt test bits are set */ + wmb(); + local_irq_restore(flags); + + return count; +} + +/* + * The Stratix10 EDAC Error Injection Functions differ from Arria10 + * slightly. A few Arria10 peripherals can use this injection function. + * Inject the error into the memory and then readback to trigger the IRQ. + */ +static ssize_t altr_edac_a10_device_trig2(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct edac_device_ctl_info *edac_dci = file->private_data; + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + void __iomem *set_addr = (drvdata->base + priv->set_err_ofst); + unsigned long flags; + u8 trig_type; + + if (!user_buf || get_user(trig_type, user_buf)) + return -EFAULT; + + local_irq_save(flags); + if (trig_type == ALTR_UE_TRIGGER_CHAR) { + writel(priv->ue_set_mask, set_addr); + } else { + /* Setup write of 0 to first 4 bytes */ + writel(0x0, drvdata->base + ECC_BLK_WDATA0_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST); + /* Setup write of 4 bytes */ + writel(ECC_WORD_WRITE, drvdata->base + ECC_BLK_DBYTECTRL_OFST); + /* Setup Address to 0 */ + writel(0x0, drvdata->base + ECC_BLK_ADDRESS_OFST); + /* Setup accctrl to write & data override */ + writel(ECC_WRITE_DOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + /* Setup accctrl to read & ecc override */ + writel(ECC_READ_EOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + /* Setup write for single bit change */ + writel(0x1, drvdata->base + ECC_BLK_WDATA0_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA1_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA2_OFST); + writel(0x0, drvdata->base + ECC_BLK_WDATA3_OFST); + /* Copy Read ECC to Write ECC */ + writel(readl(drvdata->base + ECC_BLK_RECC0_OFST), + drvdata->base + ECC_BLK_WECC0_OFST); + writel(readl(drvdata->base + ECC_BLK_RECC1_OFST), + drvdata->base + ECC_BLK_WECC1_OFST); + /* Setup accctrl to write & ecc override & data override */ + writel(ECC_WRITE_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + /* Setup accctrl to read & ecc overwrite & data overwrite */ + writel(ECC_READ_EDOVR, drvdata->base + ECC_BLK_ACCCTRL_OFST); + /* Kick it. */ + writel(ECC_XACT_KICK, drvdata->base + ECC_BLK_STARTACC_OFST); + } + /* Ensure the interrupt test bits are set */ wmb(); local_irq_restore(flags); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index e9e3f9b61947..4213cb0bb2a7 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -300,6 +300,31 @@ struct altr_sdram_mc_data { #define S10_DDR0_IRQ_MASK BIT(16) +/* Define ECC Block Offsets for peripherals */ +#define ECC_BLK_ADDRESS_OFST 0x40 +#define ECC_BLK_RDATA0_OFST 0x44 +#define ECC_BLK_RDATA1_OFST 0x48 +#define ECC_BLK_RDATA2_OFST 0x4C +#define ECC_BLK_RDATA3_OFST 0x50 +#define ECC_BLK_WDATA0_OFST 0x54 +#define ECC_BLK_WDATA1_OFST 0x58 +#define ECC_BLK_WDATA2_OFST 0x5C +#define ECC_BLK_WDATA3_OFST 0x60 +#define ECC_BLK_RECC0_OFST 0x64 +#define ECC_BLK_RECC1_OFST 0x68 +#define ECC_BLK_WECC0_OFST 0x6C +#define ECC_BLK_WECC1_OFST 0x70 +#define ECC_BLK_DBYTECTRL_OFST 0x74 +#define ECC_BLK_ACCCTRL_OFST 0x78 +#define ECC_BLK_STARTACC_OFST 0x7C + +#define ECC_XACT_KICK 0x10000 +#define ECC_WORD_WRITE 0xF +#define ECC_WRITE_DOVR 0x101 +#define ECC_WRITE_EDOVR 0x103 +#define ECC_READ_EOVR 0x2 +#define ECC_READ_EDOVR 0x3 + struct altr_edac_device_dev; struct edac_device_prv_data { -- cgit v1.2.3 From c4a3e94641449362ee970f521a2cdb0e8cd08690 Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Thu, 27 Sep 2018 16:31:28 +0200 Subject: EDAC, amd64: Add Hygon Dhyana support Add support for Hygon Dhyana CPU to EDAC. Signed-off-by: Pu Wen Signed-off-by: Borislav Petkov Cc: mchehab@kernel.org Cc: tglx@linutronix.de Cc: mingo@redhat.com Cc: hpa@zytor.com Cc: thomas.lendacky@amd.com Cc: linux-edac@vger.kernel.org Link: https://lkml.kernel.org/r/9d71061301177822bc55b3bfd44f91057458d886.1537533369.git.puwen@hygon.cn --- drivers/edac/amd64_edac.c | 10 +++++++++- drivers/edac/mce_amd.c | 4 +++- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index e2addb2bca29..6ea98575a402 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -211,7 +211,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) scrubval = scrubrates[i].scrubval; - if (pvt->fam == 0x17) { + if (pvt->fam == 0x17 || pvt->fam == 0x18) { __f17h_set_scrubval(pvt, scrubval); } else if (pvt->fam == 0x15 && pvt->model == 0x60) { f15h_select_dct(pvt, 0); @@ -264,6 +264,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci) break; case 0x17: + case 0x18: amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval); if (scrubval & BIT(0)) { amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval); @@ -1044,6 +1045,7 @@ static void determine_memory_type(struct amd64_pvt *pvt) goto ddr3; case 0x17: + case 0x18: if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) pvt->dram_type = MEM_LRDDR4; else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) @@ -3202,8 +3204,13 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) pvt->ops = &family_types[F17_M10H_CPUS].ops; break; } + /* fall through */ + case 0x18: fam_type = &family_types[F17_CPUS]; pvt->ops = &family_types[F17_CPUS].ops; + + if (pvt->fam == 0x18) + family_types[F17_CPUS].ctl_name = "F18h"; break; default: @@ -3442,6 +3449,7 @@ static const struct x86_cpu_id amd64_cpuids[] = { { X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { } }; MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 2ab4d61ee47e..c605089d899f 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1059,7 +1059,8 @@ static int __init mce_amd_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; - if (c->x86_vendor != X86_VENDOR_AMD) + if (c->x86_vendor != X86_VENDOR_AMD && + c->x86_vendor != X86_VENDOR_HYGON) return -ENODEV; fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); @@ -1113,6 +1114,7 @@ static int __init mce_amd_init(void) break; case 0x17: + case 0x18: xec_mask = 0x3f; if (!boot_cpu_has(X86_FEATURE_SMCA)) { printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n"); -- cgit v1.2.3 From 8537bf10976c56c6e0cc8b05a835e58cbbb1d287 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 27 Sep 2018 12:09:26 +0200 Subject: EDAC, altera: Work around int-to-pointer-cast warnings The altera edac driver passes a token from a DT resource as resource_size_t into an SMC call, but casts it to an __iomem pointer and then a plain void pointer inbetween, mixing three or four incompatible types in the process. The compiler complains about one of the conversions: drivers/edac/altera_edac.c: In function 'altr_init_a10_ecc_block': drivers/edac/altera_edac.c:1053:10: error: cast to pointer from integer of \ different size [-Werror=int-to-pointer-cast] base = (void __iomem *)res.start; ^ drivers/edac/altera_edac.c: In function 'altr_edac_a10_probe': drivers/edac/altera_edac.c:2062:10: error: cast to pointer from integer of \ different size [-Werror=int-to-pointer-cast] base = (void __iomem *)res.start; Using a static checker probably also notices the __iomem cast. Solving this properly isn't trivial, but simply casting to a 'uintptr_t' instead of 'void __iomem *' makes it less wrong and should avoid the warnings. Fixes: d5fc9125566c ("EDAC, altera: Combine Stratix10 and Arria10 probe functions") Signed-off-by: Arnd Bergmann Signed-off-by: Borislav Petkov Reviewed-by: Thor Thayer Cc: Mauro Carvalho Chehab Cc: David Frey Cc: Mark Rutland Cc: Ingo Molnar Cc: Christophe JAILLET Cc: linux-edac@vger.kernel.org Link: https://lkml.kernel.org/r/20180927100949.973078-1-arnd@arndb.de --- drivers/edac/altera_edac.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 523ae84a4dd8..4324a00f3c27 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1035,7 +1035,7 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, } else { struct device_node *sysmgr_np; struct resource res; - void __iomem *base; + uintptr_t base; sysmgr_np = of_parse_phandle(np_eccmgr, "altr,sysmgr-syscon", 0); @@ -1049,9 +1049,9 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, return -ENOMEM; /* Need physical address for SMCC call */ - base = (void __iomem *)res.start; + base = res.start; - ecc_mgr_map = regmap_init(NULL, NULL, base, + ecc_mgr_map = regmap_init(NULL, NULL, (void *)base, &s10_sdram_regmap_cfg); } of_node_put(np_eccmgr); @@ -2044,7 +2044,7 @@ static int altr_edac_a10_probe(struct platform_device *pdev) } else { struct device_node *sysmgr_np; struct resource res; - void __iomem *base; + uintptr_t base; sysmgr_np = of_parse_phandle(pdev->dev.of_node, "altr,sysmgr-syscon", 0); @@ -2058,9 +2058,10 @@ static int altr_edac_a10_probe(struct platform_device *pdev) return -ENOMEM; /* Need physical address for SMCC call */ - base = (void __iomem *)res.start; + base = res.start; - edac->ecc_mgr_map = devm_regmap_init(&pdev->dev, NULL, base, + edac->ecc_mgr_map = devm_regmap_init(&pdev->dev, NULL, + (void *)base, &s10_sdram_regmap_cfg); } -- cgit v1.2.3 From 432de7fd7630c84ad24f1c2acd1e3bb4ce3741ca Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 28 Sep 2018 14:39:34 -0700 Subject: EDAC, {i7core,sb,skx}_edac: Fix uncorrected error counting The count of errors is picked up from bits 52:38 of the machine check bank status register. But this is the count of *corrected* errors. If an uncorrected error is being logged, the h/w sets this field to 0. Which means that when edac_mc_handle_error() is called, the EDAC core will carefully add zero to the appropriate uncorrected error counts. Signed-off-by: Tony Luck [ Massage commit message. ] Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Cc: Aristeu Rozanski Cc: Mauro Carvalho Chehab Cc: Qiuxu Zhuo Cc: linux-edac Link: http://lkml.kernel.org/r/20180928213934.19890-1-tony.luck@intel.com --- drivers/edac/i7core_edac.c | 1 + drivers/edac/sb_edac.c | 1 + drivers/edac/skx_edac.c | 1 + 3 files changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 943aab62dea1..9ef448fef12f 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1711,6 +1711,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, u32 errnum = find_first_bit(&error, 32); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) tp_event = HW_EVENT_ERR_FATAL; else diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index cacd0ca12af6..9353c3fc7c05 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2982,6 +2982,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, recoverable = GET_BITFIELD(m->status, 56, 56); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) { type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index b2987efa2c6d..e840412abe4e 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -959,6 +959,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, recoverable = GET_BITFIELD(m->status, 56, 56); if (uncorrected_error) { + core_err_cnt = 1; if (ripv) { type = "FATAL"; tp_event = HW_EVENT_ERR_FATAL; -- cgit v1.2.3 From 8f18973877204dc8ca4ce1004a5d28683b9a7086 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 9 Oct 2018 10:20:25 -0700 Subject: EDAC, skx_edac: Fix logical channel intermediate decoding The code "lchan = (lchan << 1) | ~lchan" for logical channel intermediate decoding is wrong. The wrong intermediate decoding result is {0xffffffff, 0xfffffffe}. Fix it by replacing '~' with '!'. The correct intermediate decoding result is {0x1, 0x2}. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov CC: Aristeu Rozanski CC: Mauro Carvalho Chehab CC: linux-edac Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20181009172025.18594-1-tony.luck@intel.com --- drivers/edac/skx_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index e840412abe4e..dd209e0dd9ab 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -668,7 +668,7 @@ sad_found: break; case 2: lchan = (addr >> shift) % 2; - lchan = (lchan << 1) | ~lchan; + lchan = (lchan << 1) | !lchan; break; case 3: lchan = ((addr >> shift) % 2) << 1; -- cgit v1.2.3 From d8c27ba86a2fd806d3957e5a9b30e66dfca2a61d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 13 Oct 2018 13:28:43 +0300 Subject: EDAC, thunderx: Fix memory leak in thunderx_l2c_threaded_isr() Fix memory leak in L2c threaded interrupt handler. [ bp: Rewrite commit message. ] Fixes: 41003396f932 ("EDAC, thunderx: Add Cavium ThunderX EDAC driver") Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov CC: David Daney CC: Jan Glauber CC: Mauro Carvalho Chehab CC: Sergey Temerkhanov CC: linux-edac Link: http://lkml.kernel.org/r/20181013102843.GG16086@mwanda --- drivers/edac/thunderx_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c index c009d94f40c5..34be60fe6892 100644 --- a/drivers/edac/thunderx_edac.c +++ b/drivers/edac/thunderx_edac.c @@ -1884,7 +1884,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) default: dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n", l2c->pdev->device); - return IRQ_NONE; + goto err_free; } while (CIRC_CNT(l2c->ring_head, l2c->ring_tail, @@ -1906,7 +1906,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) l2c->ring_tail++; } - return IRQ_HANDLED; + ret = IRQ_HANDLED; err_free: kfree(other); -- cgit v1.2.3 From 4cf841e398503990df640f7a7c5b2ea56f11c08c Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 15 Oct 2018 16:11:31 -0700 Subject: ACPI/ADXL: Add address translation interface using an ACPI DSM Some new Intel servers provide an interface so that the OS can ask the BIOS to translate a system physical address to a memory address (socket, memory controller, channel, rank, dimm, etc.). This is useful for EDAC drivers that want to take the address of an error reported in a machine check bank and let the user know which DIMM may need to be replaced. Specification for this interface is available at: https://cdrdv2.intel.com/v1/dl/getContent/603354 [ Based on earlier code by Qiuxu Zhuo . ] [ bp: Make the first pr_info() in adxl_init() pr_debug() so that it doesn't pollute every dmesg. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Acked-by: Rafael J. Wysocki Tested-by: Qiuxu Zhuo CC: Len Brown CC: linux-acpi@vger.kernel.org CC: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20181015202620.23610-1-tony.luck@intel.com --- drivers/acpi/Kconfig | 3 + drivers/acpi/Makefile | 3 + drivers/acpi/acpi_adxl.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/adxl.h | 13 ++++ 4 files changed, 211 insertions(+) create mode 100644 drivers/acpi/acpi_adxl.c create mode 100644 include/linux/adxl.h (limited to 'drivers') diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index dd1eea90f67f..09991cc91b89 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -498,6 +498,9 @@ config ACPI_EXTLOG driver adds support for that functionality with corresponding tracepoint which carries that information to userspace. +config ACPI_ADXL + bool + menuconfig PMIC_OPREGION bool "PMIC (Power Management Integrated Circuit) operation region support" help diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 6d59aa109a91..edc039313cd6 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -61,6 +61,9 @@ acpi-$(CONFIG_ACPI_LPIT) += acpi_lpit.o acpi-$(CONFIG_ACPI_GENERIC_GSI) += irq.o acpi-$(CONFIG_ACPI_WATCHDOG) += acpi_watchdog.o +# Address translation +acpi-$(CONFIG_ACPI_ADXL) += acpi_adxl.o + # These are (potentially) separate modules # IPMI may be used by other drivers, so it has to initialise before them diff --git a/drivers/acpi/acpi_adxl.c b/drivers/acpi/acpi_adxl.c new file mode 100644 index 000000000000..13c8f7b50c46 --- /dev/null +++ b/drivers/acpi/acpi_adxl.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Address translation interface via ACPI DSM. + * Copyright (C) 2018 Intel Corporation + * + * Specification for this interface is available at: + * + * https://cdrdv2.intel.com/v1/dl/getContent/603354 + */ + +#include +#include + +#define ADXL_REVISION 0x1 +#define ADXL_IDX_GET_ADDR_PARAMS 0x1 +#define ADXL_IDX_FORWARD_TRANSLATE 0x2 +#define ACPI_ADXL_PATH "\\_SB.ADXL" + +/* + * The specification doesn't provide a limit on how many + * components are in a memory address. But since we allocate + * memory based on the number the BIOS tells us, we should + * defend against insane values. + */ +#define ADXL_MAX_COMPONENTS 500 + +#undef pr_fmt +#define pr_fmt(fmt) "ADXL: " fmt + +static acpi_handle handle; +static union acpi_object *params; +static const guid_t adxl_guid = + GUID_INIT(0xAA3C050A, 0x7EA4, 0x4C1F, + 0xAF, 0xDA, 0x12, 0x67, 0xDF, 0xD3, 0xD4, 0x8D); + +static int adxl_count; +static char **adxl_component_names; + +static union acpi_object *adxl_dsm(int cmd, union acpi_object argv[]) +{ + union acpi_object *obj, *o; + + obj = acpi_evaluate_dsm_typed(handle, &adxl_guid, ADXL_REVISION, + cmd, argv, ACPI_TYPE_PACKAGE); + if (!obj) { + pr_info("DSM call failed for cmd=%d\n", cmd); + return NULL; + } + + if (obj->package.count != 2) { + pr_info("Bad pkg count %d\n", obj->package.count); + goto err; + } + + o = obj->package.elements; + if (o->type != ACPI_TYPE_INTEGER) { + pr_info("Bad 1st element type %d\n", o->type); + goto err; + } + if (o->integer.value) { + pr_info("Bad ret val %llu\n", o->integer.value); + goto err; + } + + o = obj->package.elements + 1; + if (o->type != ACPI_TYPE_PACKAGE) { + pr_info("Bad 2nd element type %d\n", o->type); + goto err; + } + return obj; + +err: + ACPI_FREE(obj); + return NULL; +} + +/** + * adxl_get_component_names - get list of memory component names + * Returns NULL terminated list of string names + * + * Give the caller a pointer to the list of memory component names + * e.g. { "SystemAddress", "ProcessorSocketId", "ChannelId", ... NULL } + * Caller should count how many strings in order to allocate a buffer + * for the return from adxl_decode(). + */ +const char * const *adxl_get_component_names(void) +{ + return (const char * const *)adxl_component_names; +} +EXPORT_SYMBOL_GPL(adxl_get_component_names); + +/** + * adxl_decode - ask BIOS to decode a system address to memory address + * @addr: the address to decode + * @component_values: pointer to array of values for each component + * Returns 0 on success, negative error code otherwise + * + * The index of each value returned in the array matches the index of + * each component name returned by adxl_get_component_names(). + * Components that are not defined for this address translation (e.g. + * mirror channel number for a non-mirrored address) are set to ~0ull. + */ +int adxl_decode(u64 addr, u64 component_values[]) +{ + union acpi_object argv4[2], *results, *r; + int i, cnt; + + if (!adxl_component_names) + return -EOPNOTSUPP; + + argv4[0].type = ACPI_TYPE_PACKAGE; + argv4[0].package.count = 1; + argv4[0].package.elements = &argv4[1]; + argv4[1].integer.type = ACPI_TYPE_INTEGER; + argv4[1].integer.value = addr; + + results = adxl_dsm(ADXL_IDX_FORWARD_TRANSLATE, argv4); + if (!results) + return -EINVAL; + + r = results->package.elements + 1; + cnt = r->package.count; + if (cnt != adxl_count) { + ACPI_FREE(results); + return -EINVAL; + } + r = r->package.elements; + + for (i = 0; i < cnt; i++) + component_values[i] = r[i].integer.value; + + ACPI_FREE(results); + + return 0; +} +EXPORT_SYMBOL_GPL(adxl_decode); + +static int __init adxl_init(void) +{ + char *path = ACPI_ADXL_PATH; + union acpi_object *p; + acpi_status status; + int i; + + status = acpi_get_handle(NULL, path, &handle); + if (ACPI_FAILURE(status)) { + pr_debug("No ACPI handle for path %s\n", path); + return -ENODEV; + } + + if (!acpi_has_method(handle, "_DSM")) { + pr_info("No DSM method\n"); + return -ENODEV; + } + + if (!acpi_check_dsm(handle, &adxl_guid, ADXL_REVISION, + ADXL_IDX_GET_ADDR_PARAMS | + ADXL_IDX_FORWARD_TRANSLATE)) { + pr_info("DSM method does not support forward translate\n"); + return -ENODEV; + } + + params = adxl_dsm(ADXL_IDX_GET_ADDR_PARAMS, NULL); + if (!params) { + pr_info("Failed to get component names\n"); + return -ENODEV; + } + + p = params->package.elements + 1; + adxl_count = p->package.count; + if (adxl_count > ADXL_MAX_COMPONENTS) { + pr_info("Insane number of address component names %d\n", adxl_count); + ACPI_FREE(params); + return -ENODEV; + } + p = p->package.elements; + + /* + * Allocate one extra for NULL termination. + */ + adxl_component_names = kcalloc(adxl_count + 1, sizeof(char *), GFP_KERNEL); + if (!adxl_component_names) { + ACPI_FREE(params); + return -ENOMEM; + } + + for (i = 0; i < adxl_count; i++) + adxl_component_names[i] = p[i].string.pointer; + + return 0; +} +subsys_initcall(adxl_init); diff --git a/include/linux/adxl.h b/include/linux/adxl.h new file mode 100644 index 000000000000..2a629acb4c3f --- /dev/null +++ b/include/linux/adxl.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Address translation interface via ACPI DSM. + * Copyright (C) 2018 Intel Corporation + */ + +#ifndef _LINUX_ADXL_H +#define _LINUX_ADXL_H + +const char * const *adxl_get_component_names(void); +int adxl_decode(u64 addr, u64 component_values[]); + +#endif /* _LINUX_ADXL_H */ -- cgit v1.2.3