diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-13 20:21:00 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-13 20:21:00 +0300 |
commit | a9429089d3e822d45be01a9635f0685174508fd3 (patch) | |
tree | d5483a635cdc4d74967c1ddb383f1e283bf9849c /arch/x86/include | |
parent | 076f14be7fc942e112c94c841baec44124275cd0 (diff) | |
parent | 7ccddc4613db446dc3cbb69a3763ba60ec651d13 (diff) | |
download | linux-a9429089d3e822d45be01a9635f0685174508fd3.tar.xz |
Merge tag 'ras-core-2020-06-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS updates from Thomas Gleixner:
"RAS updates from Borislav Petkov:
- Unmap a whole guest page if an MCE is encountered in it to avoid
follow-on MCEs leading to the guest crashing, by Tony Luck.
This change collided with the entry changes and the merge
resolution would have been rather unpleasant. To avoid that the
entry branch was merged in before applying this. The resulting code
did not change over the rebase.
- AMD MCE error thresholding machinery cleanup and hotplug
sanitization, by Thomas Gleixner.
- Change the MCE notifiers to denote whether they have handled the
error and not break the chain early by returning NOTIFY_STOP, thus
giving the opportunity for the later handlers in the chain to see
it. By Tony Luck.
- Add AMD family 0x17, models 0x60-6f support, by Alexander Monakov.
- Last but not least, the usual round of fixes and improvements"
* tag 'ras-core-2020-06-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
x86/mce/dev-mcelog: Fix -Wstringop-truncation warning about strncpy()
x86/{mce,mm}: Unmap the entire page if the whole page is affected and poisoned
EDAC/amd64: Add AMD family 17h model 60h PCI IDs
hwmon: (k10temp) Add AMD family 17h model 60h PCI match
x86/amd_nb: Add AMD family 17h model 60h PCI IDs
x86/mcelog: Add compat_ioctl for 32-bit mcelog support
x86/mce: Drop bogus comment about mce.kflags
x86/mce: Fixup exception only for the correct MCEs
EDAC: Drop the EDAC report status checks
x86/mce: Add mce=print_all option
x86/mce: Change default MCE logger to check mce->kflags
x86/mce: Fix all mce notifiers to update the mce->kflags bitmask
x86/mce: Add a struct mce.kflags field
x86/mce: Convert the CEC to use the MCE notifier
x86/mce: Rename "first" function as "early"
x86/mce/amd, edac: Remove report_gart_errors
x86/mce/amd: Make threshold bank setting hotplug robust
x86/mce/amd: Cleanup threshold device remove path
x86/mce/amd: Straighten CPU hotplug path
x86/mce/amd: Sanitize thresholding device creation hotplug path
...
Diffstat (limited to 'arch/x86/include')
-rw-r--r-- | arch/x86/include/asm/amd_nb.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 28 | ||||
-rw-r--r-- | arch/x86/include/asm/set_memory.h | 19 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/mce.h | 1 |
4 files changed, 35 insertions, 14 deletions
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index c7df20e78b09..455066a06f60 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -57,6 +57,7 @@ struct threshold_bank { /* initialized to the number of CPUs on the node sharing this bank */ refcount_t cpus; + unsigned int shared; }; struct amd_northbridge { diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index a00130112b02..cf503824529c 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -127,6 +127,17 @@ #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) +#define XEC(x, mask) (((x) >> 16) & mask) + +/* mce.kflags flag bits for logging etc. */ +#define MCE_HANDLED_CEC BIT_ULL(0) +#define MCE_HANDLED_UC BIT_ULL(1) +#define MCE_HANDLED_EXTLOG BIT_ULL(2) +#define MCE_HANDLED_NFIT BIT_ULL(3) +#define MCE_HANDLED_EDAC BIT_ULL(4) +#define MCE_HANDLED_MCELOG BIT_ULL(5) +#define MCE_IN_KERNEL_RECOV BIT_ULL(6) + /* * This structure contains all data related to the MCE log. Also * carries a signature to make it easier to find from external @@ -142,14 +153,16 @@ struct mce_log_buffer { struct mce entry[]; }; +/* Highest last */ enum mce_notifier_prios { - MCE_PRIO_FIRST = INT_MAX, - MCE_PRIO_UC = INT_MAX - 1, - MCE_PRIO_EXTLOG = INT_MAX - 2, - MCE_PRIO_NFIT = INT_MAX - 3, - MCE_PRIO_EDAC = INT_MAX - 4, - MCE_PRIO_MCELOG = 1, - MCE_PRIO_LOWEST = 0, + MCE_PRIO_LOWEST, + MCE_PRIO_MCELOG, + MCE_PRIO_EDAC, + MCE_PRIO_NFIT, + MCE_PRIO_EXTLOG, + MCE_PRIO_UC, + MCE_PRIO_EARLY, + MCE_PRIO_CEC }; struct notifier_block; @@ -347,5 +360,4 @@ umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return #endif static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } - #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ec2c0a094b5d..5948218f35c5 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -86,28 +86,35 @@ int set_direct_map_default_noflush(struct page *page); extern int kernel_set_to_readonly; #ifdef CONFIG_X86_64 -static inline int set_mce_nospec(unsigned long pfn) +/* + * Prevent speculative access to the page by either unmapping + * it (if we do not require access to any part of the page) or + * marking it uncacheable (if we want to try to retrieve data + * from non-poisoned lines in the page). + */ +static inline int set_mce_nospec(unsigned long pfn, bool unmap) { unsigned long decoy_addr; int rc; /* - * Mark the linear address as UC to make sure we don't log more - * errors because of speculative access to the page. * We would like to just call: - * set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1); + * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); * but doing that would radically increase the odds of a * speculative access to the poison page because we'd have * the virtual address of the kernel 1:1 mapping sitting * around in registers. * Instead we get tricky. We create a non-canonical address * that looks just like the one we want, but has bit 63 flipped. - * This relies on set_memory_uc() properly sanitizing any __pa() + * This relies on set_memory_XX() properly sanitizing any __pa() * results with __PHYSICAL_MASK or PTE_PFN_MASK. */ decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); - rc = set_memory_uc(decoy_addr, 1); + if (unmap) + rc = set_memory_np(decoy_addr, 1); + else + rc = set_memory_uc(decoy_addr, 1); if (rc) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); return rc; diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 955c2a2e1cf9..db9adc081c5a 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -35,6 +35,7 @@ struct mce { __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ __u64 ppin; /* Protected Processor Inventory Number */ __u32 microcode; /* Microcode revision */ + __u64 kflags; /* Internal kernel use */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) |