summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig.cpu2
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/boot/header.S14
-rw-r--r--arch/x86/boot/setup.ld6
-rw-r--r--arch/x86/entry/entry.S23
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S11
-rw-r--r--arch/x86/entry/entry_64_compat.S1
-rw-r--r--arch/x86/include/asm/coco.h5
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/entry-common.h1
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/nospec-branch.h25
-rw-r--r--arch/x86/include/asm/rmwcc.h2
-rw-r--r--arch/x86/include/asm/special_insns.h2
-rw-r--r--arch/x86/include/asm/uaccess.h10
-rw-r--r--arch/x86/include/asm/vsyscall.h10
-rw-r--r--arch/x86/kernel/cpu/bugs.c15
-rw-r--r--arch/x86/kernel/fpu/signal.c13
-rw-r--r--arch/x86/kernel/kvm.c3
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kvm/hyperv.c50
-rw-r--r--arch/x86/kvm/hyperv.h3
-rw-r--r--arch/x86/kvm/svm/svm_ops.h6
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c2
-rw-r--r--arch/x86/kvm/vmx/run_flags.h7
-rw-r--r--arch/x86/kvm/vmx/vmenter.S9
-rw-r--r--arch/x86/kvm/vmx/vmx.c24
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h6
-rw-r--r--arch/x86/kvm/x86.c27
-rw-r--r--arch/x86/lib/getuser.S24
-rw-r--r--arch/x86/lib/putuser.S20
-rw-r--r--arch/x86/mm/fault.c9
-rw-r--r--arch/x86/mm/ident_map.c23
-rw-r--r--arch/x86/mm/maccess.c10
-rw-r--r--arch/x86/mm/numa.c21
-rw-r--r--arch/x86/xen/smp.c12
39 files changed, 279 insertions, 143 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b9224cf2ee4d..2a7279d80460 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -379,7 +379,7 @@ config X86_CMOV
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
- default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
default "5" if X86_32 && X86_CMPXCHG64
default "4"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 2264db14a25d..da8f3caf2781 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -112,13 +112,13 @@ ifeq ($(CONFIG_X86_32),y)
# temporary until string.h is fixed
KBUILD_CFLAGS += -ffreestanding
- ifeq ($(CONFIG_STACKPROTECTOR),y)
- ifeq ($(CONFIG_SMP),y)
+ ifeq ($(CONFIG_STACKPROTECTOR),y)
+ ifeq ($(CONFIG_SMP),y)
KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
- else
+ else
KBUILD_CFLAGS += -mstack-protector-guard=global
- endif
endif
+ endif
else
BITS := 64
UTS_MACHINE := x86_64
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b2771710ed98..a1bbedd989e4 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -106,8 +106,7 @@ extra_header_fields:
.word 0 # MinorSubsystemVersion
.long 0 # Win32VersionValue
- .long setup_size + ZO__end + pecompat_vsize
- # SizeOfImage
+ .long setup_size + ZO__end # SizeOfImage
.long salign # SizeOfHeaders
.long 0 # CheckSum
@@ -143,7 +142,7 @@ section_table:
.ascii ".setup"
.byte 0
.byte 0
- .long setup_size - salign # VirtualSize
+ .long pecompat_fstart - salign # VirtualSize
.long salign # VirtualAddress
.long pecompat_fstart - salign # SizeOfRawData
.long salign # PointerToRawData
@@ -156,8 +155,8 @@ section_table:
#ifdef CONFIG_EFI_MIXED
.asciz ".compat"
- .long 8 # VirtualSize
- .long setup_size + ZO__end # VirtualAddress
+ .long pecompat_fsize # VirtualSize
+ .long pecompat_fstart # VirtualAddress
.long pecompat_fsize # SizeOfRawData
.long pecompat_fstart # PointerToRawData
@@ -172,17 +171,16 @@ section_table:
* modes this image supports.
*/
.pushsection ".pecompat", "a", @progbits
- .balign falign
- .set pecompat_vsize, salign
+ .balign salign
.globl pecompat_fstart
pecompat_fstart:
.byte 0x1 # Version
.byte 8 # Size
.word IMAGE_FILE_MACHINE_I386 # PE machine type
.long setup_size + ZO_efi32_pe_entry # Entrypoint
+ .byte 0x0 # Sentinel
.popsection
#else
- .set pecompat_vsize, 0
.set pecompat_fstart, setup_size
#endif
.ascii ".text"
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 83bb7efad8ae..3a2d1360abb0 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -24,6 +24,9 @@ SECTIONS
.text : { *(.text .text.*) }
.text32 : { *(.text32) }
+ .pecompat : { *(.pecompat) }
+ PROVIDE(pecompat_fsize = setup_size - pecompat_fstart);
+
. = ALIGN(16);
.rodata : { *(.rodata*) }
@@ -36,9 +39,6 @@ SECTIONS
. = ALIGN(16);
.data : { *(.data*) }
- .pecompat : { *(.pecompat) }
- PROVIDE(pecompat_fsize = setup_size - pecompat_fstart);
-
.signature : {
setup_sig = .;
LONG(0x5a5aaa55)
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 8c8d38f0cb1d..003379049924 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -6,6 +6,9 @@
#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/segment.h>
+#include <asm/cache.h>
.pushsection .noinstr.text, "ax"
@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
EXPORT_SYMBOL_GPL(entry_ibpb);
.popsection
+
+/*
+ * Define the VERW operand that is disguised as entry code so that
+ * it can be referenced with KPTI enabled. This ensure VERW can be
+ * used late in exit-to-user path after page tables are switched.
+ */
+.pushsection .entry.text, "ax"
+
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_START_NOALIGN(mds_verw_sel)
+ UNWIND_HINT_UNDEFINED
+ ANNOTATE_NOENDBR
+ .word __KERNEL_DS
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_END(mds_verw_sel);
+/* For KVM */
+EXPORT_SYMBOL_GPL(mds_verw_sel);
+
+.popsection
+
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index c73047bf9f4b..fba427646805 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -885,6 +885,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
BUG_IF_WRONG_CR3 no_user_check=1
popfl
popl %eax
+ CLEAR_CPU_BUFFERS
/*
* Return back to the vDSO, which will pop ecx and edx.
@@ -954,6 +955,7 @@ restore_all_switch_stack:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
+ CLEAR_CPU_BUFFERS
.Lirq_return:
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -1146,6 +1148,7 @@ SYM_CODE_START(asm_exc_nmi)
/* Not on SYSENTER stack. */
call exc_nmi
+ CLEAR_CPU_BUFFERS
jmp .Lnmi_return
.Lnmi_from_sysenter_stack:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c40f89ab1b4c..9bb485977629 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -161,6 +161,7 @@ syscall_return_via_sysret:
SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
swapgs
+ CLEAR_CPU_BUFFERS
sysretq
SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
@@ -573,6 +574,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
.Lswapgs_and_iret:
swapgs
+ CLEAR_CPU_BUFFERS
/* Assert that the IRET frame indicates user mode. */
testb $3, 8(%rsp)
jnz .Lnative_iret
@@ -723,6 +725,8 @@ native_irq_return_ldt:
*/
popq %rax /* Restore user RAX */
+ CLEAR_CPU_BUFFERS
+
/*
* RSP now points to an ordinary IRET frame, except that the page
* is read-only and RSP[31:16] are preloaded with the userspace
@@ -1450,6 +1454,12 @@ nmi_restore:
movq $0, 5*8(%rsp) /* clear "NMI executing" */
/*
+ * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
+ * NMI in kernel after user state is restored. For an unprivileged user
+ * these conditions are hard to meet.
+ */
+
+ /*
* iretq reads the "iret" frame and exits the NMI stack in a
* single instruction. We are returning to kernel mode, so this
* cannot result in a fault. Similarly, we don't need to worry
@@ -1466,6 +1476,7 @@ SYM_CODE_START(entry_SYSCALL32_ignore)
UNWIND_HINT_END_OF_STACK
ENDBR
mov $-ENOSYS, %eax
+ CLEAR_CPU_BUFFERS
sysretl
SYM_CODE_END(entry_SYSCALL32_ignore)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index de94e2e84ecc..eabf48c4d4b4 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -270,6 +270,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
xorl %r9d, %r9d
xorl %r10d, %r10d
swapgs
+ CLEAR_CPU_BUFFERS
sysretl
SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index 6ae2d16a7613..76c310b19b11 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -10,13 +10,14 @@ enum cc_vendor {
CC_VENDOR_INTEL,
};
-extern enum cc_vendor cc_vendor;
-
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+extern enum cc_vendor cc_vendor;
void cc_set_mask(u64 mask);
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
#else
+#define cc_vendor (CC_VENDOR_NONE)
+
static inline u64 cc_mkenc(u64 val)
{
return val;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a26bebbdff87..a1273698fc43 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,7 +168,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
".pushsection .altinstr_aux,\"ax\"\n"
"6:\n"
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index fdf723b6f6d0..2b62cdd8dd12 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -95,7 +95,7 @@
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index ce8f50192ae3..7e523bb3d2d3 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
static __always_inline void arch_exit_to_user_mode(void)
{
- mds_user_clear_cpu_buffers();
amd_clear_divider();
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 071572e23d3a..cbbef32517f0 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -24,7 +24,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
"jmp %l[l_yes] # objtool NOPs this \n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (2 | branch) : : l_yes);
@@ -38,7 +38,7 @@ l_yes:
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
".byte " __stringify(BYTES_NOP5) "\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
@@ -52,7 +52,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
"jmp %l[l_yes]\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b5b2d0fde579..d271ba20a0b2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1145,6 +1145,8 @@ struct kvm_hv {
unsigned int synic_auto_eoi_used;
struct kvm_hv_syndbg hv_syndbg;
+
+ bool xsaves_xsavec_checked;
};
#endif
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 262e65539f83..2aa52cab1e46 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -315,6 +315,17 @@
#endif
.endm
+/*
+ * Macro to execute VERW instruction that mitigate transient data sampling
+ * attacks such as MDS. On affected systems a microcode update overloaded VERW
+ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
+ *
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+.macro CLEAR_CPU_BUFFERS
+ ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
+.endm
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@@ -529,13 +540,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-DECLARE_STATIC_KEY_FALSE(mds_user_clear);
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+extern u16 mds_verw_sel;
+
#include <asm/segment.h>
/**
@@ -562,17 +574,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
- *
- * Clear CPU buffers if the corresponding static key is enabled
- */
-static __always_inline void mds_user_clear_cpu_buffers(void)
-{
- if (static_branch_likely(&mds_user_clear))
- mds_clear_cpu_buffers();
-}
-
-/**
* mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 4b081e0d3306..363266cbcada 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -13,7 +13,7 @@
#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
({ \
bool c = false; \
- asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
+ asm goto (fullop "; j" #cc " %l[cc_label]" \
: : [var] "m" (_var), ## __VA_ARGS__ \
: clobbers : cc_label); \
if (0) { \
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index d6cd9344f6c7..48f8dd47cf68 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -205,7 +205,7 @@ static inline void clwb(volatile void *__p)
#ifdef CONFIG_X86_USER_SHADOW_STACK
static inline int write_user_shstk_64(u64 __user *addr, u64 val)
{
- asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
+ asm goto("1: wrussq %[val], (%[addr])\n"
_ASM_EXTABLE(1b, %l[fail])
:: [addr] "r" (addr), [val] "r" (val)
:: fail);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 5c367c1290c3..237dc8cdd12b 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -133,7 +133,7 @@ extern int __get_user_bad(void);
#ifdef CONFIG_X86_32
#define __put_user_goto_u64(x, addr, label) \
- asm_volatile_goto("\n" \
+ asm goto("\n" \
"1: movl %%eax,0(%1)\n" \
"2: movl %%edx,4(%1)\n" \
_ASM_EXTABLE_UA(1b, %l2) \
@@ -295,7 +295,7 @@ do { \
} while (0)
#define __get_user_asm(x, addr, itype, ltype, label) \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: mov"itype" %[umem],%[output]\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: [output] ltype(x) \
@@ -375,7 +375,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@@ -394,7 +394,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@@ -477,7 +477,7 @@ struct __large_struct { unsigned long buf[100]; };
* aliasing issues.
*/
#define __put_user_goto(x, addr, itype, ltype, label) \
- asm_volatile_goto("\n" \
+ asm goto("\n" \
"1: mov"itype" %0,%1\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: : ltype(x), "m" (__m(addr)) \
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index ab60a71a8dcb..472f0263dbc6 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -4,6 +4,7 @@
#include <linux/seqlock.h>
#include <uapi/asm/vsyscall.h>
+#include <asm/page_types.h>
#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
@@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code,
}
#endif
+/*
+ * The (legacy) vsyscall page is the long page in the kernel portion
+ * of the address space that has user-accessible permissions.
+ */
+static inline bool is_vsyscall_vaddr(unsigned long vaddr)
+{
+ return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
+}
+
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bb0ab8466b91..48d049cd74e7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -111,9 +111,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-/* Control MDS CPU buffer clear before returning to user space */
-DEFINE_STATIC_KEY_FALSE(mds_user_clear);
-EXPORT_SYMBOL_GPL(mds_user_clear);
/* Control MDS CPU buffer clear before idling (halt, mwait) */
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
EXPORT_SYMBOL_GPL(mds_idle_clear);
@@ -252,7 +249,7 @@ static void __init mds_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
mds_mitigation = MDS_MITIGATION_VMWERV;
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
(mds_nosmt || cpu_mitigations_auto_nosmt()))
@@ -356,7 +353,7 @@ static void __init taa_select_mitigation(void)
* For guests that can't determine whether the correct microcode is
* present on host, enable the mitigation for UCODE_NEEDED as well.
*/
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
@@ -424,7 +421,7 @@ static void __init mmio_select_mitigation(void)
*/
if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
boot_cpu_has(X86_FEATURE_RTM)))
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
static_branch_enable(&mmio_stale_data_clear);
@@ -484,12 +481,12 @@ static void __init md_clear_update_mitigation(void)
if (cpu_mitigations_off())
return;
- if (!static_key_enabled(&mds_user_clear))
+ if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
goto out;
/*
- * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
- * mitigation, if necessary.
+ * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
+ * Stale Data mitigation, if necessary.
*/
if (mds_mitigation == MDS_MITIGATION_OFF &&
boot_cpu_has_bug(X86_BUG_MDS)) {
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 558076dbde5b..247f2225aa9f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
* Attempt to restore the FPU registers directly from user memory.
* Pagefaults are handled and any errors returned are fatal.
*/
-static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
- bool fx_only, unsigned int size)
+static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
{
struct fpu *fpu = &current->thread.fpu;
int ret;
+ /* Restore enabled features only. */
+ xrestore &= fpu->fpstate->user_xfeatures;
retry:
fpregs_lock();
/* Ensure that XFD is up to date */
@@ -309,7 +310,7 @@ retry:
if (ret != X86_TRAP_PF)
return false;
- if (!fault_in_readable(buf, size))
+ if (!fault_in_readable(buf, fpu->fpstate->user_size))
goto retry;
return false;
}
@@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
struct user_i387_ia32_struct env;
bool success, fx_only = false;
union fpregs_state *fpregs;
- unsigned int state_size;
u64 user_xfeatures = 0;
if (use_xsave()) {
@@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
return false;
fx_only = !fx_sw_user.magic1;
- state_size = fx_sw_user.xstate_size;
user_xfeatures = fx_sw_user.xfeatures;
} else {
user_xfeatures = XFEATURE_MASK_FPSSE;
- state_size = fpu->fpstate->user_size;
}
if (likely(!ia32_fxstate)) {
/* Restore the FPU registers directly from user memory. */
- return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only,
- state_size);
+ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only);
}
/*
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index dfe9945b9bec..428ee74002e1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -434,7 +434,8 @@ static void __init sev_map_percpu_data(void)
{
int cpu;
- if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ if (cc_vendor != CC_VENDOR_AMD ||
+ !cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;
for_each_possible_cpu(cpu) {
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 17e955ab69fe..3082cf24b69e 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -563,9 +563,6 @@ nmi_restart:
}
if (this_cpu_dec_return(nmi_state))
goto nmi_restart;
-
- if (user_mode(regs))
- mds_user_clear_cpu_buffers();
}
#if IS_ENABLED(CONFIG_KVM_INTEL)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4943f6b2bbee..8a47f8541eab 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1322,6 +1322,56 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
return false;
}
+#define KVM_HV_WIN2016_GUEST_ID 0x1040a00003839
+#define KVM_HV_WIN2016_GUEST_ID_MASK (~GENMASK_ULL(23, 16)) /* mask out the service version */
+
+/*
+ * Hyper-V enabled Windows Server 2016 SMP VMs fail to boot in !XSAVES && XSAVEC
+ * configuration.
+ * Such configuration can result from, for example, AMD Erratum 1386 workaround.
+ *
+ * Print a notice so users aren't left wondering what's suddenly gone wrong.
+ */
+static void __kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_hv *hv = to_kvm_hv(kvm);
+
+ /* Check again under the hv_lock. */
+ if (hv->xsaves_xsavec_checked)
+ return;
+
+ if ((hv->hv_guest_os_id & KVM_HV_WIN2016_GUEST_ID_MASK) !=
+ KVM_HV_WIN2016_GUEST_ID)
+ return;
+
+ hv->xsaves_xsavec_checked = true;
+
+ /* UP configurations aren't affected */
+ if (atomic_read(&kvm->online_vcpus) < 2)
+ return;
+
+ if (guest_cpuid_has(vcpu, X86_FEATURE_XSAVES) ||
+ !guest_cpuid_has(vcpu, X86_FEATURE_XSAVEC))
+ return;
+
+ pr_notice_ratelimited("Booting SMP Windows KVM VM with !XSAVES && XSAVEC. "
+ "If it fails to boot try disabling XSAVEC in the VM config.\n");
+}
+
+void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
+{
+ struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
+
+ if (!vcpu->arch.hyperv_enabled ||
+ hv->xsaves_xsavec_checked)
+ return;
+
+ mutex_lock(&hv->hv_lock);
+ __kvm_hv_xsaves_xsavec_maybe_warn(vcpu);
+ mutex_unlock(&hv->hv_lock);
+}
+
static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
bool host)
{
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 1dc0b6604526..923e64903da9 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -182,6 +182,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock);
void kvm_hv_request_tsc_page_update(struct kvm *kvm);
+void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu);
+
void kvm_hv_init_vm(struct kvm *kvm);
void kvm_hv_destroy_vm(struct kvm *kvm);
int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
@@ -267,6 +269,7 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
static inline void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock) {}
static inline void kvm_hv_request_tsc_page_update(struct kvm *kvm) {}
+static inline void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu) {}
static inline void kvm_hv_init_vm(struct kvm *kvm) {}
static inline void kvm_hv_destroy_vm(struct kvm *kvm) {}
static inline int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 36c8af87a707..4e725854c63a 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -8,7 +8,7 @@
#define svm_asm(insn, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) "\n\t" \
+ asm goto("1: " __stringify(insn) "\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
::: clobber : fault); \
return; \
@@ -18,7 +18,7 @@ fault: \
#define svm_asm1(insn, op1, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
+ asm goto("1: " __stringify(insn) " %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1 : clobber : fault); \
return; \
@@ -28,7 +28,7 @@ fault: \
#define svm_asm2(insn, op1, op2, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
+ asm goto("1: " __stringify(insn) " %1, %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1, op2 : clobber : fault); \
return; \
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index a6216c874729..315c7c2ba89b 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -71,7 +71,7 @@ static int fixed_pmc_events[] = {
static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
{
struct kvm_pmc *pmc;
- u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
+ u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
int i;
pmu->fixed_ctr_ctrl = data;
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
index edc3f16cc189..6a9bfdfbb6e5 100644
--- a/arch/x86/kvm/vmx/run_flags.h
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -2,7 +2,10 @@
#ifndef __KVM_X86_VMX_RUN_FLAGS_H
#define __KVM_X86_VMX_RUN_FLAGS_H
-#define VMX_RUN_VMRESUME (1 << 0)
-#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
+#define VMX_RUN_VMRESUME_SHIFT 0
+#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
+
+#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
+#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 906ecd001511..2bfbf758d061 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -139,7 +139,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
mov (%_ASM_SP), %_ASM_AX
/* Check if vmlaunch or vmresume is needed */
- test $VMX_RUN_VMRESUME, %ebx
+ bt $VMX_RUN_VMRESUME_SHIFT, %ebx
/* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
@@ -161,8 +161,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
- /* Check EFLAGS.ZF from 'test VMX_RUN_VMRESUME' above */
- jz .Lvmlaunch
+ /* Clobbers EFLAGS.ZF */
+ CLEAR_CPU_BUFFERS
+
+ /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
+ jnc .Lvmlaunch
/*
* After a successful VMRESUME/VMLAUNCH, control flow "magically"
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e262bc2ba4e5..88a4ff200d04 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -388,7 +388,16 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{
- vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+ /*
+ * Disable VERW's behavior of clearing CPU buffers for the guest if the
+ * CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled
+ * the mitigation. Disabling the clearing behavior provides a
+ * performance boost for guests that aren't aware that manually clearing
+ * CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry
+ * and VM-Exit.
+ */
+ vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
+ (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
!boot_cpu_has_bug(X86_BUG_MDS) &&
!boot_cpu_has_bug(X86_BUG_TAA);
@@ -738,7 +747,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
*/
static int kvm_cpu_vmxoff(void)
{
- asm_volatile_goto("1: vmxoff\n\t"
+ asm goto("1: vmxoff\n\t"
_ASM_EXTABLE(1b, %l[fault])
::: "cc", "memory" : fault);
@@ -2784,7 +2793,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer)
cr4_set_bits(X86_CR4_VMXE);
- asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
+ asm goto("1: vmxon %[vmxon_pointer]\n\t"
_ASM_EXTABLE(1b, %l[fault])
: : [vmxon_pointer] "m"(vmxon_pointer)
: : fault);
@@ -7224,11 +7233,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
guest_state_enter_irqoff();
- /* L1D Flush includes CPU buffer clear to mitigate MDS */
+ /*
+ * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
+ * mitigation for MDS is done late in VMentry and is still
+ * executed in spite of L1D Flush. This is because an extra VERW
+ * should not matter much after the big hammer L1D Flush.
+ */
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
- else if (static_branch_unlikely(&mds_user_clear))
- mds_clear_cpu_buffers();
else if (static_branch_unlikely(&mmio_stale_data_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
mds_clear_cpu_buffers();
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index f41ce3c24123..8060e5fc6dbd 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -94,7 +94,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
- asm_volatile_goto("1: vmread %[field], %[output]\n\t"
+ asm_goto_output("1: vmread %[field], %[output]\n\t"
"jna %l[do_fail]\n\t"
_ASM_EXTABLE(1b, %l[do_exception])
@@ -188,7 +188,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
#define vmx_asm1(insn, op1, error_args...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
+ asm goto("1: " __stringify(insn) " %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
@@ -205,7 +205,7 @@ fault: \
#define vmx_asm2(insn, op1, op2, error_args...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
+ asm goto("1: " __stringify(insn) " %1, %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 363b1c080205..48a61d283406 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1704,22 +1704,17 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
struct kvm_msr_entry msr;
int r;
+ /* Unconditionally clear the output for simplicity */
+ msr.data = 0;
msr.index = index;
r = kvm_get_msr_feature(&msr);
- if (r == KVM_MSR_RET_INVALID) {
- /* Unconditionally clear the output for simplicity */
- *data = 0;
- if (kvm_msr_ignored_check(index, 0, false))
- r = 0;
- }
-
- if (r)
- return r;
+ if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false))
+ r = 0;
*data = msr.data;
- return 0;
+ return r;
}
static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -1782,6 +1777,10 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS)
kvm_mmu_reset_context(vcpu);
+ if (!static_cpu_has(X86_FEATURE_XSAVES) &&
+ (efer & EFER_SVME))
+ kvm_hv_xsaves_xsavec_maybe_warn(vcpu);
+
return 0;
}
@@ -2507,7 +2506,7 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
}
#ifdef CONFIG_X86_64
-static inline int gtod_is_based_on_tsc(int mode)
+static inline bool gtod_is_based_on_tsc(int mode)
{
return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
}
@@ -5454,7 +5453,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) {
vcpu->arch.nmi_pending = 0;
atomic_set(&vcpu->arch.nmi_queued, events->nmi.pending);
- kvm_make_request(KVM_REQ_NMI, vcpu);
+ if (events->nmi.pending)
+ kvm_make_request(KVM_REQ_NMI, vcpu);
}
static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
@@ -7016,6 +7016,9 @@ set_identity_unlock:
r = -EEXIST;
if (kvm->arch.vpit)
goto create_pit_unlock;
+ r = -ENOENT;
+ if (!pic_in_kernel(kvm))
+ goto create_pit_unlock;
r = -ENOMEM;
kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
if (kvm->arch.vpit)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 20ef350a60fb..10d5ed8b5990 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -163,23 +163,23 @@ SYM_CODE_END(__get_user_8_handle_exception)
#endif
/* get_user */
- _ASM_EXTABLE(1b, __get_user_handle_exception)
- _ASM_EXTABLE(2b, __get_user_handle_exception)
- _ASM_EXTABLE(3b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(1b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(2b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(3b, __get_user_handle_exception)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE(4b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(4b, __get_user_handle_exception)
#else
- _ASM_EXTABLE(4b, __get_user_8_handle_exception)
- _ASM_EXTABLE(5b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(4b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(5b, __get_user_8_handle_exception)
#endif
/* __get_user */
- _ASM_EXTABLE(6b, __get_user_handle_exception)
- _ASM_EXTABLE(7b, __get_user_handle_exception)
- _ASM_EXTABLE(8b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(6b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(7b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(8b, __get_user_handle_exception)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE(9b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(9b, __get_user_handle_exception)
#else
- _ASM_EXTABLE(9b, __get_user_8_handle_exception)
- _ASM_EXTABLE(10b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(9b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(10b, __get_user_8_handle_exception)
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 2877f5934177..975c9c18263d 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -133,15 +133,15 @@ SYM_CODE_START_LOCAL(__put_user_handle_exception)
RET
SYM_CODE_END(__put_user_handle_exception)
- _ASM_EXTABLE(1b, __put_user_handle_exception)
- _ASM_EXTABLE(2b, __put_user_handle_exception)
- _ASM_EXTABLE(3b, __put_user_handle_exception)
- _ASM_EXTABLE(4b, __put_user_handle_exception)
- _ASM_EXTABLE(5b, __put_user_handle_exception)
- _ASM_EXTABLE(6b, __put_user_handle_exception)
- _ASM_EXTABLE(7b, __put_user_handle_exception)
- _ASM_EXTABLE(9b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(1b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(2b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(3b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(4b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(5b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(6b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(7b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(9b, __put_user_handle_exception)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE(8b, __put_user_handle_exception)
- _ASM_EXTABLE(10b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(8b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(10b, __put_user_handle_exception)
#endif
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 679b09cfe241..d6375b3c633b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -798,15 +798,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
show_opcodes(regs, loglvl);
}
-/*
- * The (legacy) vsyscall page is the long page in the kernel portion
- * of the address space that has user-accessible permissions.
- */
-static bool is_vsyscall_vaddr(unsigned long vaddr)
-{
- return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
-}
-
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
unsigned long address, u32 pkey, int si_code)
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 968d7005f4a7..f50cc210a981 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
+ bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- if (info->direct_gbpages) {
- pud_t pudval;
+ /* if this is already a gbpage, this portion is already mapped */
+ if (pud_large(*pud))
+ continue;
+
+ /* Is using a gbpage allowed? */
+ use_gbpage = info->direct_gbpages;
- if (pud_present(*pud))
- continue;
+ /* Don't use gbpage if it maps more than the requested region. */
+ /* at the begining: */
+ use_gbpage &= ((addr & ~PUD_MASK) == 0);
+ /* ... or at the end: */
+ use_gbpage &= ((next & ~PUD_MASK) == 0);
+
+ /* Never overwrite existing mappings */
+ use_gbpage &= !pud_present(*pud);
+
+ if (use_gbpage) {
+ pud_t pudval;
- addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
index 6993f026adec..42115ac079cf 100644
--- a/arch/x86/mm/maccess.c
+++ b/arch/x86/mm/maccess.c
@@ -3,6 +3,8 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
+#include <asm/vsyscall.h>
+
#ifdef CONFIG_X86_64
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{
@@ -16,6 +18,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
return false;
/*
+ * Reading from the vsyscall page may cause an unhandled fault in
+ * certain cases. Though it is at an address above TASK_SIZE_MAX, it is
+ * usually considered as a user space address.
+ */
+ if (is_vsyscall_vaddr(vaddr))
+ return false;
+
+ /*
* Allow everything during early boot before 'x86_virt_bits'
* is initialized. Needed for instruction decoding in early
* exception handlers.
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index adc497b93f03..65e9a6e391c0 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -934,7 +934,7 @@ static int __init cmp_memblk(const void *a, const void *b)
const struct numa_memblk *ma = *(const struct numa_memblk **)a;
const struct numa_memblk *mb = *(const struct numa_memblk **)b;
- return ma->start - mb->start;
+ return (ma->start > mb->start) - (ma->start < mb->start);
}
static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
@@ -944,14 +944,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
* @start: address to begin fill
* @end: address to end fill
*
- * Find and extend numa_meminfo memblks to cover the @start-@end
- * physical address range, such that the first memblk includes
- * @start, the last memblk includes @end, and any gaps in between
- * are filled.
+ * Find and extend numa_meminfo memblks to cover the physical
+ * address range @start-@end
*
* RETURNS:
* 0 : Success
- * NUMA_NO_MEMBLK : No memblk exists in @start-@end range
+ * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
*/
int __init numa_fill_memblks(u64 start, u64 end)
@@ -963,17 +961,14 @@ int __init numa_fill_memblks(u64 start, u64 end)
/*
* Create a list of pointers to numa_meminfo memblks that
- * overlap start, end. Exclude (start == bi->end) since
- * end addresses in both a CFMWS range and a memblk range
- * are exclusive.
- *
- * This list of pointers is used to make in-place changes
- * that fill out the numa_meminfo memblks.
+ * overlap start, end. The list is used to make in-place
+ * changes that fill out the numa_meminfo memblks.
*/
for (int i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i];
- if (start < bi->end && end >= bi->start) {
+ if (memblock_addrs_overlap(start, end - start, bi->start,
+ bi->end - bi->start)) {
blk[count] = &mi->blk[i];
count++;
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 4b0d6fff88de..1fb9a1644d94 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -65,6 +65,8 @@ int xen_smp_intr_init(unsigned int cpu)
char *resched_name, *callfunc_name, *debug_name;
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
+ if (!resched_name)
+ goto fail_mem;
per_cpu(xen_resched_irq, cpu).name = resched_name;
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
cpu,
@@ -77,6 +79,8 @@ int xen_smp_intr_init(unsigned int cpu)
per_cpu(xen_resched_irq, cpu).irq = rc;
callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
+ if (!callfunc_name)
+ goto fail_mem;
per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
cpu,
@@ -90,6 +94,9 @@ int xen_smp_intr_init(unsigned int cpu)
if (!xen_fifo_events) {
debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
+ if (!debug_name)
+ goto fail_mem;
+
per_cpu(xen_debug_irq, cpu).name = debug_name;
rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu,
xen_debug_interrupt,
@@ -101,6 +108,9 @@ int xen_smp_intr_init(unsigned int cpu)
}
callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+ if (!callfunc_name)
+ goto fail_mem;
+
per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
cpu,
@@ -114,6 +124,8 @@ int xen_smp_intr_init(unsigned int cpu)
return 0;
+ fail_mem:
+ rc = -ENOMEM;
fail:
xen_smp_intr_free(cpu);
return rc;