diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mce/severity.c')
-rw-r--r-- | arch/x86/kernel/cpu/mce/severity.c | 102 |
1 files changed, 89 insertions, 13 deletions
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index e1da619add19..83df991314c5 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -9,9 +9,14 @@ #include <linux/seq_file.h> #include <linux/init.h> #include <linux/debugfs.h> -#include <asm/mce.h> #include <linux/uaccess.h> +#include <asm/mce.h> +#include <asm/intel-family.h> +#include <asm/traps.h> +#include <asm/insn.h> +#include <asm/insn-eval.h> + #include "internal.h" /* @@ -40,9 +45,14 @@ static struct severity { unsigned char context; unsigned char excp; unsigned char covered; + unsigned char cpu_model; + unsigned char cpu_minstepping; + unsigned char bank_lo, bank_hi; char *msg; } severities[] = { #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } +#define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h +#define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s #define KERNEL .context = IN_KERNEL #define USER .context = IN_USER #define KERNEL_RECOV .context = IN_KERNEL_RECOV @@ -90,14 +100,9 @@ static struct severity { EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0) ), MCESEV( - DEFERRED, "Deferred error", - NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED) - ), - MCESEV( KEEP, "Corrected error", NOSER, BITCLR(MCI_STATUS_UC) ), - /* * known AO MCACODs reported via MCE or CMC: * @@ -113,6 +118,18 @@ static struct severity { AO, "Action optional: last level cache writeback error", SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB) ), + /* + * Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured + * to report uncorrected errors using CMCI with a special signature. + * UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported + * in one of the memory controller banks. + * Set severity to "AO" for same action as normal patrol scrub error. + */ + MCESEV( + AO, "Uncorrected Patrol Scrub Error", + SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0), + MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, 4), BANK_RANGE(13, 18) + ), /* ignore OVER for UCNA */ MCESEV( @@ -198,6 +215,47 @@ static struct severity { #define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \ (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) +static bool is_copy_from_user(struct pt_regs *regs) +{ + u8 insn_buf[MAX_INSN_SIZE]; + struct insn insn; + unsigned long addr; + + if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE)) + return false; + + kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE); + insn_get_opcode(&insn); + if (!insn.opcode.got) + return false; + + switch (insn.opcode.value) { + /* MOV mem,reg */ + case 0x8A: case 0x8B: + /* MOVZ mem,reg */ + case 0xB60F: case 0xB70F: + insn_get_modrm(&insn); + insn_get_sib(&insn); + if (!insn.modrm.got || !insn.sib.got) + return false; + addr = (unsigned long)insn_get_addr_ref(&insn, regs); + break; + /* REP MOVS */ + case 0xA4: case 0xA5: + addr = regs->si; + break; + default: + return false; + } + + if (fault_in_kernel_space(addr)) + return false; + + current->mce_vaddr = (void __user *)addr; + + return true; +} + /* * If mcgstatus indicated that ip/cs on the stack were * no good, then "m->cs" will be zero and we will have @@ -209,15 +267,25 @@ static struct severity { * distinguish an exception taken in user from from one * taken in the kernel. */ -static int error_context(struct mce *m) +static int error_context(struct mce *m, struct pt_regs *regs) { + enum handler_type t; + if ((m->cs & 3) == 3) return IN_USER; + if (!mc_recoverable(m->mcgstatus)) + return IN_KERNEL; - if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip)) { + t = ex_get_fault_handler_type(m->ip); + if (t == EX_HANDLER_FAULT) { m->kflags |= MCE_IN_KERNEL_RECOV; return IN_KERNEL_RECOV; } + if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) { + m->kflags |= MCE_IN_KERNEL_RECOV; + m->kflags |= MCE_IN_KERNEL_COPYIN; + return IN_KERNEL_RECOV; + } return IN_KERNEL; } @@ -253,9 +321,10 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx) * See AMD Error Scope Hierarchy table in a newer BKDG. For example * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features" */ -static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp) +static int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant, + char **msg, bool is_excp) { - enum context ctx = error_context(m); + enum context ctx = error_context(m, regs); /* Processor Context Corrupt, no need to fumble too much, die! */ if (m->status & MCI_STATUS_PCC) @@ -305,10 +374,11 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc return MCE_KEEP_SEVERITY; } -static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp) +static int mce_severity_intel(struct mce *m, struct pt_regs *regs, + int tolerant, char **msg, bool is_excp) { enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP); - enum context ctx = error_context(m); + enum context ctx = error_context(m, regs); struct severity *s; for (s = severities;; s++) { @@ -324,6 +394,12 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e continue; if (s->excp && excp != s->excp) continue; + if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model) + continue; + if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping) + continue; + if (s->bank_lo && (m->bank < s->bank_lo || m->bank > s->bank_hi)) + continue; if (msg) *msg = s->msg; s->covered = 1; @@ -336,7 +412,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e } /* Default to mce_severity_intel */ -int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) = +int (*mce_severity)(struct mce *m, struct pt_regs *regs, int tolerant, char **msg, bool is_excp) = mce_severity_intel; void __init mcheck_vendor_init_severity(void) |