diff options
author | Borislav Petkov <borislav.petkov@amd.com> | 2009-07-24 15:51:42 +0400 |
---|---|---|
committer | Borislav Petkov <borislav.petkov@amd.com> | 2009-09-14 20:59:17 +0400 |
commit | 549d042df240dfb4203bab40ad44f9336751b7d6 (patch) | |
tree | af357ed8eaf06c26f19d458686b6c7ea4e425a05 /drivers/edac/edac_mce_amd.c | |
parent | ecaf5606de65cdd04de5f526185fe28fb0df654e (diff) | |
download | linux-549d042df240dfb4203bab40ad44f9336751b7d6.tar.xz |
x86, mce: pass mce info to EDAC for decoding
Move NB decoder along with required defines to EDAC MCE core. Add
registration routines for further decoding of the MCE info in the AMD64
EDAC module.
CC: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Diffstat (limited to 'drivers/edac/edac_mce_amd.c')
-rw-r--r-- | drivers/edac/edac_mce_amd.c | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 918567e8cfd5..444c2cc4472d 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c @@ -1,6 +1,31 @@ #include <linux/module.h> #include "edac_mce_amd.h" +static bool report_gart_errors; +static void (*nb_bus_decoder)(int node_id, struct err_regs *regs, int ecc_type); + +void amd_report_gart_errors(bool v) +{ + report_gart_errors = v; +} +EXPORT_SYMBOL_GPL(amd_report_gart_errors); + +void amd_register_ecc_decoder(void (*f)(int, struct err_regs *, int)) +{ + nb_bus_decoder = f; +} +EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); + +void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *, int)) +{ + if (nb_bus_decoder) { + WARN_ON(nb_bus_decoder != f); + + nb_bus_decoder = NULL; + } +} +EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); + /* * string representation for the different MCA reported error types, see F3x48 * or MSR0000_0411. @@ -102,3 +127,93 @@ const char *ext_msgs[] = { "Probe Filter error" /* 1_1111b */ }; EXPORT_SYMBOL_GPL(ext_msgs); + +void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors) +{ + int ecc; + u32 ec = ERROR_CODE(regs->nbsl); + u32 xec = EXT_ERROR_CODE(regs->nbsl); + + if (!handle_errors) + return; + + pr_emerg(" Northbridge Error, node %d", node_id); + + /* + * F10h, revD can disable ErrCpu[3:0] so check that first and also the + * value encoding has changed so interpret those differently + */ + if ((boot_cpu_data.x86 == 0x10) && + (boot_cpu_data.x86_model > 8)) { + if (regs->nbsh & K8_NBSH_ERR_CPU_VAL) + pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf)); + } else { + pr_cont(", core: %d\n", ilog2((regs->nbsh & 0xf))); + } + + pr_emerg(" Error: %sorrected", + ((regs->nbsh & K8_NBSH_UC_ERR) ? "Unc" : "C")); + pr_cont(", Report Error: %s", + ((regs->nbsh & K8_NBSH_ERR_EN) ? "yes" : "no")); + pr_cont(", MiscV: %svalid, CPU context corrupt: %s", + ((regs->nbsh & K8_NBSH_MISCV) ? "" : "In"), + ((regs->nbsh & K8_NBSH_PCC) ? "yes" : "no")); + + /* do the two bits[14:13] together */ + ecc = regs->nbsh & (0x3 << 13); + if (ecc) + pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); + + pr_cont("\n"); + + if (TLB_ERROR(ec)) { + /* + * GART errors are intended to help graphics driver developers + * to detect bad GART PTEs. It is recommended by AMD to disable + * GART table walk error reporting by default[1] (currently + * being disabled in mce_cpu_quirks()) and according to the + * comment in mce_cpu_quirks(), such GART errors can be + * incorrectly triggered. We may see these errors anyway and + * unless requested by the user, they won't be reported. + * + * [1] section 13.10.1 on BIOS and Kernel Developers Guide for + * AMD NPT family 0Fh processors + */ + if (!report_gart_errors) + return; + + pr_emerg(" GART TLB error, Transaction: %s, Cache Level %s\n", + TT_MSG(ec), LL_MSG(ec)); + } else if (MEM_ERROR(ec)) { + pr_emerg(" Memory/Cache error, Transaction: %s, Type: %s," + " Cache Level: %s", + RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); + } else if (BUS_ERROR(ec)) { + pr_emerg(" Bus (Link/DRAM) error\n"); + if (nb_bus_decoder) + nb_bus_decoder(node_id, regs, ecc); + } else { + /* shouldn't reach here! */ + pr_warning("%s: unknown MCE error 0x%x\n", __func__, ec); + } + + pr_emerg("%s.\n", EXT_ERR_MSG(xec)); +} +EXPORT_SYMBOL_GPL(amd_decode_nb_mce); + +void decode_mce(struct mce *m) +{ + struct err_regs regs; + int node; + + if (m->bank != 4) + return; + + regs.nbsl = (u32) m->status; + regs.nbsh = (u32)(m->status >> 32); + regs.nbeal = (u32) m->addr; + regs.nbeah = (u32)(m->addr >> 32); + node = topology_cpu_node_id(m->extcpu); + + amd_decode_nb_mce(node, ®s, 1); +} |