diff options
Diffstat (limited to 'arch/x86/include/asm')
102 files changed, 1698 insertions, 2133 deletions
diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h new file mode 100644 index 000000000000..1b07fb102c4e --- /dev/null +++ b/arch/x86/include/asm/GEN-for-each-reg.h @@ -0,0 +1,25 @@ +#ifdef CONFIG_64BIT +GEN(rax) +GEN(rbx) +GEN(rcx) +GEN(rdx) +GEN(rsi) +GEN(rdi) +GEN(rbp) +GEN(r8) +GEN(r9) +GEN(r10) +GEN(r11) +GEN(r12) +GEN(r13) +GEN(r14) +GEN(r15) +#else +GEN(eax) +GEN(ebx) +GEN(ecx) +GEN(edx) +GEN(esi) +GEN(edi) +GEN(ebp) +#endif diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h deleted file mode 100644 index 4adb13f08af7..000000000000 --- a/arch/x86/include/asm/acrn.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_ACRN_H -#define _ASM_X86_ACRN_H - -extern void acrn_hv_callback_vector(void); -#ifdef CONFIG_TRACING -#define trace_acrn_hv_callback_vector acrn_hv_callback_vector -#endif - -extern void acrn_hv_vector_handler(struct pt_regs *regs); -#endif /* _ASM_X86_ACRN_H */ diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h index 8e25bf4f323a..62da760d6d5a 100644 --- a/arch/x86/include/asm/agp.h +++ b/arch/x86/include/asm/agp.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_AGP_H #define _ASM_X86_AGP_H -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include <asm/cacheflush.h> /* diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index c7df20e78b09..455066a06f60 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -57,6 +57,7 @@ struct threshold_bank { /* initialized to the number of CPUs on the node sharing this bank */ refcount_t cpus; + unsigned int shared; }; struct amd_northbridge { diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h index 99bb207fc04c..87ce8e963215 100644 --- a/arch/x86/include/asm/apb_timer.h +++ b/arch/x86/include/asm/apb_timer.h @@ -25,11 +25,7 @@ #define APBT_MIN_FREQ 1000000 #define APBT_MMAP_SIZE 1024 -#define APBT_DEV_USED 1 - extern void apbt_time_init(void); -extern unsigned long apbt_quick_calibrate(void); -extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); extern void apbt_setup_secondary_clock(void); extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); @@ -38,7 +34,6 @@ extern int sfi_mtimer_num; #else /* CONFIG_APB_TIMER */ -static inline unsigned long apbt_quick_calibrate(void) {return 0; } static inline void apbt_time_init(void) { } #endif diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 19e94af9cc5d..2cc44e957c31 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -519,39 +519,6 @@ static inline bool apic_id_is_primary_thread(unsigned int id) { return false; } static inline void apic_smt_update(void) { } #endif -extern void irq_enter(void); -extern void irq_exit(void); - -static inline void entering_irq(void) -{ - irq_enter(); - kvm_set_cpu_l1tf_flush_l1d(); -} - -static inline void entering_ack_irq(void) -{ - entering_irq(); - ack_APIC_irq(); -} - -static inline void ipi_entering_ack_irq(void) -{ - irq_enter(); - ack_APIC_irq(); - kvm_set_cpu_l1tf_flush_l1d(); -} - -static inline void exiting_irq(void) -{ - irq_exit(); -} - -static inline void exiting_ack_irq(void) -{ - ack_APIC_irq(); - irq_exit(); -} - extern void ioapic_zap_locks(void); #endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 7a4bb1bd4bdb..ebc248e49549 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -15,16 +15,6 @@ #define RDRAND_RETRY_LOOPS 10 -#define RDRAND_INT ".byte 0x0f,0xc7,0xf0" -#define RDSEED_INT ".byte 0x0f,0xc7,0xf8" -#ifdef CONFIG_X86_64 -# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0" -# define RDSEED_LONG ".byte 0x48,0x0f,0xc7,0xf8" -#else -# define RDRAND_LONG RDRAND_INT -# define RDSEED_LONG RDSEED_INT -#endif - /* Unconditional execution of RDRAND and RDSEED */ static inline bool __must_check rdrand_long(unsigned long *v) @@ -32,9 +22,9 @@ static inline bool __must_check rdrand_long(unsigned long *v) bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; do { - asm volatile(RDRAND_LONG + asm volatile("rdrand %[out]" CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); + : CC_OUT(c) (ok), [out] "=r" (*v)); if (ok) return true; } while (--retry); @@ -46,9 +36,9 @@ static inline bool __must_check rdrand_int(unsigned int *v) bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; do { - asm volatile(RDRAND_INT + asm volatile("rdrand %[out]" CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); + : CC_OUT(c) (ok), [out] "=r" (*v)); if (ok) return true; } while (--retry); @@ -58,18 +48,18 @@ static inline bool __must_check rdrand_int(unsigned int *v) static inline bool __must_check rdseed_long(unsigned long *v) { bool ok; - asm volatile(RDSEED_LONG + asm volatile("rdseed %[out]" CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); + : CC_OUT(c) (ok), [out] "=r" (*v)); return ok; } static inline bool __must_check rdseed_int(unsigned int *v) { bool ok; - asm volatile(RDSEED_INT + asm volatile("rdseed %[out]" CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); + : CC_OUT(c) (ok), [out] "=r" (*v)); return ok; } diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index ce92c4acc913..5a42f9206138 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -1,13 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <asm/ftrace.h> #include <linux/uaccess.h> +#include <linux/pgtable.h> #include <asm/string.h> #include <asm/page.h> #include <asm/checksum.h> #include <asm-generic/asm-prototypes.h> -#include <asm/pgtable.h> #include <asm/special_insns.h> #include <asm/preempt.h> #include <asm/asm.h> @@ -17,24 +17,19 @@ extern void cmpxchg8b_emu(void); #endif #ifdef CONFIG_RETPOLINE -#ifdef CONFIG_X86_32 -#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); -#else -#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); -INDIRECT_THUNK(8) -INDIRECT_THUNK(9) -INDIRECT_THUNK(10) -INDIRECT_THUNK(11) -INDIRECT_THUNK(12) -INDIRECT_THUNK(13) -INDIRECT_THUNK(14) -INDIRECT_THUNK(15) -#endif -INDIRECT_THUNK(ax) -INDIRECT_THUNK(bx) -INDIRECT_THUNK(cx) -INDIRECT_THUNK(dx) -INDIRECT_THUNK(si) -INDIRECT_THUNK(di) -INDIRECT_THUNK(bp) + +#define DECL_INDIRECT_THUNK(reg) \ + extern asmlinkage void __x86_indirect_thunk_ ## reg (void); + +#define DECL_RETPOLINE(reg) \ + extern asmlinkage void __x86_retpoline_ ## reg (void); + +#undef GEN +#define GEN(reg) DECL_INDIRECT_THUNK(reg) +#include <asm/GEN-for-each-reg.h> + +#undef GEN +#define GEN(reg) DECL_RETPOLINE(reg) +#include <asm/GEN-for-each-reg.h> + #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 115127c7ad28..bf35e476a776 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -28,7 +28,7 @@ static __always_inline int arch_atomic_read(const atomic_t *v) * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here, * it's non-inlined function that increases binary size and stack usage. */ - return READ_ONCE((v)->counter); + return __READ_ONCE((v)->counter); } /** @@ -40,7 +40,7 @@ static __always_inline int arch_atomic_read(const atomic_t *v) */ static __always_inline void arch_atomic_set(atomic_t *v, int i) { - WRITE_ONCE(v->counter, i); + __WRITE_ONCE(v->counter, i); } /** @@ -166,6 +166,7 @@ static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } +#define arch_atomic_add_return arch_atomic_add_return /** * arch_atomic_sub_return - subtract integer and return @@ -178,34 +179,39 @@ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) { return arch_atomic_add_return(-i, v); } +#define arch_atomic_sub_return arch_atomic_sub_return static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return xadd(&v->counter, i); } +#define arch_atomic_fetch_add arch_atomic_fetch_add static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) { return xadd(&v->counter, -i); } +#define arch_atomic_fetch_sub arch_atomic_fetch_sub static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { return arch_cmpxchg(&v->counter, old, new); } +#define arch_atomic_cmpxchg arch_atomic_cmpxchg -#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { return try_cmpxchg(&v->counter, old, new); } +#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg -static inline int arch_atomic_xchg(atomic_t *v, int new) +static __always_inline int arch_atomic_xchg(atomic_t *v, int new) { return arch_xchg(&v->counter, new); } +#define arch_atomic_xchg arch_atomic_xchg -static inline void arch_atomic_and(int i, atomic_t *v) +static __always_inline void arch_atomic_and(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "andl %1,%0" : "+m" (v->counter) @@ -213,7 +219,7 @@ static inline void arch_atomic_and(int i, atomic_t *v) : "memory"); } -static inline int arch_atomic_fetch_and(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v) { int val = arch_atomic_read(v); @@ -221,8 +227,9 @@ static inline int arch_atomic_fetch_and(int i, atomic_t *v) return val; } +#define arch_atomic_fetch_and arch_atomic_fetch_and -static inline void arch_atomic_or(int i, atomic_t *v) +static __always_inline void arch_atomic_or(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "orl %1,%0" : "+m" (v->counter) @@ -230,7 +237,7 @@ static inline void arch_atomic_or(int i, atomic_t *v) : "memory"); } -static inline int arch_atomic_fetch_or(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v) { int val = arch_atomic_read(v); @@ -238,8 +245,9 @@ static inline int arch_atomic_fetch_or(int i, atomic_t *v) return val; } +#define arch_atomic_fetch_or arch_atomic_fetch_or -static inline void arch_atomic_xor(int i, atomic_t *v) +static __always_inline void arch_atomic_xor(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "xorl %1,%0" : "+m" (v->counter) @@ -247,7 +255,7 @@ static inline void arch_atomic_xor(int i, atomic_t *v) : "memory"); } -static inline int arch_atomic_fetch_xor(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v) { int val = arch_atomic_read(v); @@ -255,6 +263,7 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v) return val; } +#define arch_atomic_fetch_xor arch_atomic_fetch_xor #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> @@ -262,6 +271,6 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v) # include <asm/atomic64_64.h> #endif -#include <asm-generic/atomic-instrumented.h> +#define ARCH_ATOMIC #endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 52cfaecb13f9..5efd01b548d1 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -75,6 +75,7 @@ static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) { return arch_cmpxchg64(&v->counter, o, n); } +#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg /** * arch_atomic64_xchg - xchg atomic64 variable @@ -94,6 +95,7 @@ static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) : "memory"); return o; } +#define arch_atomic64_xchg arch_atomic64_xchg /** * arch_atomic64_set - set atomic64 variable @@ -138,6 +140,7 @@ static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) ASM_NO_INPUT_CLOBBER("memory")); return i; } +#define arch_atomic64_add_return arch_atomic64_add_return /* * Other variants with different arithmetic operators: @@ -149,6 +152,7 @@ static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) ASM_NO_INPUT_CLOBBER("memory")); return i; } +#define arch_atomic64_sub_return arch_atomic64_sub_return static inline s64 arch_atomic64_inc_return(atomic64_t *v) { @@ -242,6 +246,7 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) "S" (v) : "memory"); return (int)a; } +#define arch_atomic64_add_unless arch_atomic64_add_unless static inline int arch_atomic64_inc_not_zero(atomic64_t *v) { @@ -281,6 +286,7 @@ static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) return old; } +#define arch_atomic64_fetch_and arch_atomic64_fetch_and static inline void arch_atomic64_or(s64 i, atomic64_t *v) { @@ -299,6 +305,7 @@ static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) return old; } +#define arch_atomic64_fetch_or arch_atomic64_fetch_or static inline void arch_atomic64_xor(s64 i, atomic64_t *v) { @@ -317,6 +324,7 @@ static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) return old; } +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { @@ -327,6 +335,7 @@ static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) return old; } +#define arch_atomic64_fetch_add arch_atomic64_fetch_add #define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), (v)) diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 95c6ceac66b9..809bd010a751 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -19,7 +19,7 @@ */ static inline s64 arch_atomic64_read(const atomic64_t *v) { - return READ_ONCE((v)->counter); + return __READ_ONCE((v)->counter); } /** @@ -31,7 +31,7 @@ static inline s64 arch_atomic64_read(const atomic64_t *v) */ static inline void arch_atomic64_set(atomic64_t *v, s64 i) { - WRITE_ONCE(v->counter, i); + __WRITE_ONCE(v->counter, i); } /** @@ -159,37 +159,43 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return i + xadd(&v->counter, i); } +#define arch_atomic64_add_return arch_atomic64_add_return static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { return arch_atomic64_add_return(-i, v); } +#define arch_atomic64_sub_return arch_atomic64_sub_return static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return xadd(&v->counter, i); } +#define arch_atomic64_fetch_add arch_atomic64_fetch_add static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) { return xadd(&v->counter, -i); } +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return arch_cmpxchg(&v->counter, old, new); } +#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg -#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { return try_cmpxchg(&v->counter, old, new); } +#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); } +#define arch_atomic64_xchg arch_atomic64_xchg static inline void arch_atomic64_and(s64 i, atomic64_t *v) { @@ -207,6 +213,7 @@ static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); return val; } +#define arch_atomic64_fetch_and arch_atomic64_fetch_and static inline void arch_atomic64_or(s64 i, atomic64_t *v) { @@ -224,6 +231,7 @@ static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); return val; } +#define arch_atomic64_fetch_or arch_atomic64_fetch_or static inline void arch_atomic64_xor(s64 i, atomic64_t *v) { @@ -241,5 +249,6 @@ static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); return val; } +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor #endif /* _ASM_X86_ATOMIC64_64_H */ diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h new file mode 100644 index 000000000000..36aec57ea7a3 --- /dev/null +++ b/arch/x86/include/asm/audit.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AUDIT_H +#define _ASM_X86_AUDIT_H + +int ia32_classify_syscall(unsigned int syscall); + +#endif /* _ASM_X86_AUDIT_H */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 0367efdc5b7a..35460fef39b8 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -201,8 +201,12 @@ arch_test_and_change_bit(long nr, volatile unsigned long *addr) return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); } -static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) +static __no_kcsan_or_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) { + /* + * Because this is a plain access, we need to disable KCSAN here to + * avoid double instrumentation via instrumented bitops. + */ return ((1UL << (nr & (BITS_PER_LONG-1))) & (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index facba9bc30ca..fb34ff641e0a 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -70,14 +70,17 @@ do { \ #define HAVE_ARCH_BUG #define BUG() \ do { \ + instrumentation_begin(); \ _BUG_FLAGS(ASM_UD2, 0); \ unreachable(); \ } while (0) #define __WARN_FLAGS(flags) \ do { \ + instrumentation_begin(); \ _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \ annotate_reachable(); \ + instrumentation_end(); \ } while (0) #include <asm-generic/bug.h> diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 63feaf2a5f93..b192d917a6d0 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_CACHEFLUSH_H #define _ASM_X86_CACHEFLUSH_H +#include <linux/mm.h> + /* Caches aren't brain-dead on the intel. */ #include <asm-generic/cacheflush.h> #include <asm/special_insns.h> diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h index d79d1e622dcf..0ada98d5d09f 100644 --- a/arch/x86/include/asm/checksum.h +++ b/arch/x86/include/asm/checksum.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1 +#define HAVE_CSUM_COPY_USER #ifdef CONFIG_X86_32 # include <asm/checksum_32.h> #else diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index f57b94e02c57..11624c8a9d8d 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h @@ -44,18 +44,21 @@ static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL); } -static inline __wsum csum_partial_copy_from_user(const void __user *src, - void *dst, - int len, __wsum sum, - int *err_ptr) +static inline __wsum csum_and_copy_from_user(const void __user *src, + void *dst, int len, + __wsum sum, int *err_ptr) { __wsum ret; might_sleep(); - stac(); + if (!user_access_begin(src, len)) { + if (len) + *err_ptr = -EFAULT; + return sum; + } ret = csum_partial_copy_generic((__force void *)src, dst, len, sum, err_ptr, NULL); - clac(); + user_access_end(); return ret; } @@ -173,7 +176,6 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, /* * Copy and checksum to user */ -#define HAVE_CSUM_COPY_USER static inline __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, __wsum sum, @@ -182,11 +184,10 @@ static inline __wsum csum_and_copy_to_user(const void *src, __wsum ret; might_sleep(); - if (access_ok(dst, len)) { - stac(); + if (user_access_begin(dst, len)) { ret = csum_partial_copy_generic(src, (__force void *)dst, len, sum, NULL, err_ptr); - clac(); + user_access_end(); return ret; } diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h index 3ec6d3267cf9..0a289b87e872 100644 --- a/arch/x86/include/asm/checksum_64.h +++ b/arch/x86/include/asm/checksum_64.h @@ -129,27 +129,19 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, */ extern __wsum csum_partial(const void *buff, int len, __wsum sum); -#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1 -#define HAVE_CSUM_COPY_USER 1 - - /* Do not call this directly. Use the wrappers below */ extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst, int len, __wsum sum, int *src_err_ptr, int *dst_err_ptr); -extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, +extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum isum, int *errp); -extern __wsum csum_partial_copy_to_user(const void *src, void __user *dst, +extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, __wsum isum, int *errp); extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum); -/* Old names. To be removed. */ -#define csum_and_copy_to_user csum_partial_copy_to_user -#define csum_and_copy_from_user csum_partial_copy_from_user - /** * ip_compute_csum - Compute an 16bit IP checksum. * @buff: buffer address. diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 52e9f3480f69..d4edf281fff4 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -214,7 +214,11 @@ static inline bool in_compat_syscall(void) #endif struct compat_siginfo; -int __copy_siginfo_to_user32(struct compat_siginfo __user *to, - const kernel_siginfo_t *from, bool x32_ABI); + +#ifdef CONFIG_X86_X32_ABI +int copy_siginfo_to_user32(struct compat_siginfo __user *to, + const kernel_siginfo_t *from); +#define copy_siginfo_to_user32 copy_siginfo_to_user32 +#endif /* CONFIG_X86_X32_ABI */ #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index cf3d621c6892..eb8fcede9e3b 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -20,12 +20,14 @@ #define X86_CENTAUR_FAM6_C7_D 0xd #define X86_CENTAUR_FAM6_NANO 0xf +#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) /** - * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Base macro for CPU matching + * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY * The name is expanded to X86_VENDOR_@_vendor * @_family: The family number or X86_FAMILY_ANY * @_model: The model number, model constant or X86_MODEL_ANY + * @_steppings: Bitmask for steppings, stepping constant or X86_STEPPING_ANY * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY * @_data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer @@ -37,16 +39,35 @@ * into another macro at the usage site for good reasons, then please * start this local macro with X86_MATCH to allow easy grepping. */ -#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(_vendor, _family, _model, \ - _feature, _data) { \ +#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ + _steppings, _feature, _data) { \ .vendor = X86_VENDOR_##_vendor, \ .family = _family, \ .model = _model, \ + .steppings = _steppings, \ .feature = _feature, \ .driver_data = (unsigned long) _data \ } /** + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \ + X86_STEPPING_ANY, feature, data) + +/** * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY * The name is expanded to X86_VENDOR_@vendor @@ -139,6 +160,10 @@ #define X86_MATCH_INTEL_FAM6_MODEL(model, data) \ X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data) +#define X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(model, steppings, data) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ + steppings, X86_FEATURE_ANY, data) + /* * Match specific microcode revisions. * diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 02c0078d3787..8902fdb7de13 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -11,15 +11,11 @@ #ifdef CONFIG_X86_64 /* Macro to enforce the same ordering and stack sizes */ -#define ESTACKS_MEMBERS(guardsize, db2_holesize)\ +#define ESTACKS_MEMBERS(guardsize) \ char DF_stack_guard[guardsize]; \ char DF_stack[EXCEPTION_STKSZ]; \ char NMI_stack_guard[guardsize]; \ char NMI_stack[EXCEPTION_STKSZ]; \ - char DB2_stack_guard[guardsize]; \ - char DB2_stack[db2_holesize]; \ - char DB1_stack_guard[guardsize]; \ - char DB1_stack[EXCEPTION_STKSZ]; \ char DB_stack_guard[guardsize]; \ char DB_stack[EXCEPTION_STKSZ]; \ char MCE_stack_guard[guardsize]; \ @@ -28,12 +24,12 @@ /* The exception stacks' physical storage. No guard pages required */ struct exception_stacks { - ESTACKS_MEMBERS(0, 0) + ESTACKS_MEMBERS(0) }; /* The effective cpu entry area mapping with guard pages. */ struct cea_exception_stacks { - ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ) + ESTACKS_MEMBERS(PAGE_SIZE) }; /* @@ -42,8 +38,6 @@ struct cea_exception_stacks { enum exception_stack_ordering { ESTACK_DF, ESTACK_NMI, - ESTACK_DB2, - ESTACK_DB1, ESTACK_DB, ESTACK_MCE, N_EXCEPTION_STACKS diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index db189945e9b0..02dabc9e77b0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -362,6 +362,7 @@ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ @@ -407,5 +408,6 @@ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 1a8609a15856..e89558a3fe4a 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -18,7 +18,7 @@ DECLARE_PER_CPU(unsigned long, cpu_dr7); native_set_debugreg(register, value) #endif -static inline unsigned long native_get_debugreg(int regno) +static __always_inline unsigned long native_get_debugreg(int regno) { unsigned long val = 0; /* Damn you, gcc! */ @@ -47,7 +47,7 @@ static inline unsigned long native_get_debugreg(int regno) return val; } -static inline void native_set_debugreg(int regno, unsigned long value) +static __always_inline void native_set_debugreg(int regno, unsigned long value) { switch (regno) { case 0: @@ -85,7 +85,7 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } -static inline int hw_breakpoint_active(void) +static __always_inline bool hw_breakpoint_active(void) { return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; } @@ -94,24 +94,38 @@ extern void aout_dump_debugregs(struct user *dump); extern void hw_breakpoint_restore(void); -#ifdef CONFIG_X86_64 -DECLARE_PER_CPU(int, debug_stack_usage); -static inline void debug_stack_usage_inc(void) +static __always_inline unsigned long local_db_save(void) { - __this_cpu_inc(debug_stack_usage); + unsigned long dr7; + + if (static_cpu_has(X86_FEATURE_HYPERVISOR) && !hw_breakpoint_active()) + return 0; + + get_debugreg(dr7, 7); + dr7 &= ~0x400; /* architecturally set bit */ + if (dr7) + set_debugreg(0, 7); + /* + * Ensure the compiler doesn't lower the above statements into + * the critical section; disabling breakpoints late would not + * be good. + */ + barrier(); + + return dr7; } -static inline void debug_stack_usage_dec(void) + +static __always_inline void local_db_restore(unsigned long dr7) { - __this_cpu_dec(debug_stack_usage); + /* + * Ensure the compiler doesn't raise this statement into + * the critical section; enabling breakpoints early would + * not be good. + */ + barrier(); + if (dr7) + set_debugreg(dr7, 7); } -void debug_stack_set_zero(void); -void debug_stack_reset(void); -#else /* !X86_64 */ -static inline void debug_stack_set_zero(void) { } -static inline void debug_stack_reset(void) { } -static inline void debug_stack_usage_inc(void) { } -static inline void debug_stack_usage_dec(void) { } -#endif /* X86_64 */ #ifdef CONFIG_CPU_SUP_AMD extern void set_dr_addr_mask(unsigned long mask, int dr); diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index de9e7841f953..630891d25819 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -3,8 +3,10 @@ #define _ASM_X86_DELAY_H #include <asm-generic/delay.h> +#include <linux/init.h> -void use_tsc_delay(void); +void __init use_tsc_delay(void); +void __init use_tpause_delay(void); void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 68a99d2a5f33..1ced11d31932 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -40,11 +40,6 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in desc->l = 0; } -extern struct desc_ptr idt_descr; -extern gate_desc idt_table[]; -extern const struct desc_ptr debug_idt_descr; -extern gate_desc debug_idt_table[]; - struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); @@ -214,7 +209,7 @@ static inline void native_load_gdt(const struct desc_ptr *dtr) asm volatile("lgdt %0"::"m" (*dtr)); } -static inline void native_load_idt(const struct desc_ptr *dtr) +static __always_inline void native_load_idt(const struct desc_ptr *dtr) { asm volatile("lidt %0"::"m" (*dtr)); } @@ -386,64 +381,23 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit1 = (limit >> 16) & 0xf; } -void update_intr_gate(unsigned int n, const void *addr); void alloc_intr_gate(unsigned int n, const void *addr); extern unsigned long system_vectors[]; -#ifdef CONFIG_X86_64 -DECLARE_PER_CPU(u32, debug_idt_ctr); -static inline bool is_debug_idt_enabled(void) -{ - if (this_cpu_read(debug_idt_ctr)) - return true; - - return false; -} - -static inline void load_debug_idt(void) -{ - load_idt((const struct desc_ptr *)&debug_idt_descr); -} -#else -static inline bool is_debug_idt_enabled(void) -{ - return false; -} - -static inline void load_debug_idt(void) -{ -} -#endif - -/* - * The load_current_idt() must be called with interrupts disabled - * to avoid races. That way the IDT will always be set back to the expected - * descriptor. It's also called when a CPU is being initialized, and - * that doesn't need to disable interrupts, as nothing should be - * bothering the CPU then. - */ -static inline void load_current_idt(void) -{ - if (is_debug_idt_enabled()) - load_debug_idt(); - else - load_idt((const struct desc_ptr *)&idt_descr); -} - +extern void load_current_idt(void); extern void idt_setup_early_handler(void); extern void idt_setup_early_traps(void); extern void idt_setup_traps(void); extern void idt_setup_apic_and_irq_gates(void); +extern bool idt_is_f00f_address(unsigned long address); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); extern void idt_setup_ist_traps(void); -extern void idt_setup_debugidt_traps(void); #else static inline void idt_setup_early_pf(void) { } static inline void idt_setup_ist_traps(void) { } -static inline void idt_setup_debugidt_traps(void) { } #endif extern void idt_invalidate(void *addr); diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h index af9a14ac8962..54a6e4a2e132 100644 --- a/arch/x86/include/asm/doublefault.h +++ b/arch/x86/include/asm/doublefault.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_DOUBLEFAULT_H #define _ASM_X86_DOUBLEFAULT_H -#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT) +#ifdef CONFIG_X86_32 extern void doublefault_init_cpu_tss(void); #else static inline void doublefault_init_cpu_tss(void) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8391c115c0ec..e7d2ccfdd507 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -3,12 +3,13 @@ #define _ASM_X86_EFI_H #include <asm/fpu/api.h> -#include <asm/pgtable.h> #include <asm/processor-flags.h> #include <asm/tlb.h> #include <asm/nospec-branch.h> #include <asm/mmu_context.h> #include <linux/build_bug.h> +#include <linux/kernel.h> +#include <linux/pgtable.h> extern unsigned long efi_fw_vendor, efi_config_table; @@ -225,14 +226,21 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, /* arch specific definitions used by the stub code */ -__attribute_const__ bool efi_is_64bit(void); +#ifdef CONFIG_EFI_MIXED + +#define ARCH_HAS_EFISTUB_WRAPPERS + +static inline bool efi_is_64bit(void) +{ + extern const bool efi_is64; + + return efi_is64; +} static inline bool efi_is_native(void) { if (!IS_ENABLED(CONFIG_X86_64)) return true; - if (!IS_ENABLED(CONFIG_EFI_MIXED)) - return true; return efi_is_64bit(); } @@ -286,6 +294,15 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_allocate_pool(type, size, buffer) \ ((type), (size), efi64_zero_upper(buffer)) +#define __efi64_argmap_create_event(type, tpl, f, c, event) \ + ((type), (tpl), (f), (c), efi64_zero_upper(event)) + +#define __efi64_argmap_set_timer(event, type, time) \ + ((event), (type), lower_32_bits(time), upper_32_bits(time)) + +#define __efi64_argmap_wait_for_event(num, event, index) \ + ((num), (event), efi64_zero_upper(index)) + #define __efi64_argmap_handle_protocol(handle, protocol, interface) \ ((handle), (protocol), efi64_zero_upper(interface)) @@ -307,6 +324,10 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_load_file(protocol, path, policy, bufsize, buf) \ ((protocol), (path), (policy), efi64_zero_upper(bufsize), (buf)) +/* Graphics Output Protocol */ +#define __efi64_argmap_query_mode(gop, mode, size, info) \ + ((gop), (mode), efi64_zero_upper(size), efi64_zero_upper(info)) + /* * The macros below handle the plumbing for the argument mapping. To add a * mapping for a specific EFI method, simply define a macro @@ -335,15 +356,26 @@ static inline u32 efi64_convert_status(efi_status_t status) #define efi_bs_call(func, ...) \ (efi_is_native() \ - ? efi_system_table()->boottime->func(__VA_ARGS__) \ - : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ - boottime), func, __VA_ARGS__)) + ? efi_system_table->boottime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table, \ + boottime), \ + func, __VA_ARGS__)) #define efi_rt_call(func, ...) \ (efi_is_native() \ - ? efi_system_table()->runtime->func(__VA_ARGS__) \ - : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ - runtime), func, __VA_ARGS__)) + ? efi_system_table->runtime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table, \ + runtime), \ + func, __VA_ARGS__)) + +#else /* CONFIG_EFI_MIXED */ + +static inline bool efi_is_64bit(void) +{ + return IS_ENABLED(CONFIG_X86_64); +} + +#endif /* CONFIG_EFI_MIXED */ extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 69c0f892e310..452beed7892b 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -281,9 +281,29 @@ extern u32 elf_hwcap2; /* * An executable for which elf_read_implies_exec() returns TRUE will * have the READ_IMPLIES_EXEC personality flag set automatically. + * + * The decision process for determining the results are: + * + *        CPU: | lacks NX*  | has NX, ia32   | has NX, x86_64 | + * ELF:        |       |          |         | + * ---------------------|------------|------------------|----------------| + * missing PT_GNU_STACK | exec-all  | exec-all     | exec-none   | + * PT_GNU_STACK == RWX  | exec-stack | exec-stack    | exec-stack   | + * PT_GNU_STACK == RW  | exec-none  | exec-none     | exec-none    | + * + * exec-all : all PROT_READ user mappings are executable, except when + * backed by files on a noexec-filesystem. + * exec-none : only PROT_EXEC user mappings are executable. + * exec-stack: only the stack and PROT_EXEC user mappings are executable. + * + * *this column has no architectural effect: NX markings are ignored by + * hardware, but may have behavioral effects when "wants X" collides with + * "cannot be X" constraints in memory permission flags, as in + * https://lkml.kernel.org/r/20190418055759.GA3155@mellanox.com + * */ #define elf_read_implies_exec(ex, executable_stack) \ - (executable_stack != EXSTACK_DISABLE_X) + (mmap_is_ia32() && executable_stack == EXSTACK_DEFAULT) struct task_struct; diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h deleted file mode 100644 index 416422762845..000000000000 --- a/arch/x86/include/asm/entry_arch.h +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file is designed to contain the BUILD_INTERRUPT specifications for - * all of the extra named interrupt vectors used by the architecture. - * Usually this is the Inter Process Interrupts (IPIs) - */ - -/* - * The following vectors are part of the Linux architecture, there - * is no hardware IRQ pin equivalent for them, they are triggered - * through the ICC by us (IPIs) - */ -#ifdef CONFIG_SMP -BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) -BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) -BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) -BUILD_INTERRUPT(irq_move_cleanup_interrupt, IRQ_MOVE_CLEANUP_VECTOR) -BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR) -#endif - -#ifdef CONFIG_HAVE_KVM -BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) -BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR) -BUILD_INTERRUPT(kvm_posted_intr_nested_ipi, POSTED_INTR_NESTED_VECTOR) -#endif - -/* - * every pentium local APIC has two 'local interrupts', with a - * soft-definable vector attached to both interrupts, one of - * which is a timer interrupt, the other one is error counter - * overflow. Linux uses the local APIC timer interrupt to get - * a much simpler SMP time architecture: - */ -#ifdef CONFIG_X86_LOCAL_APIC - -BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) -BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) -BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) -BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) - -#ifdef CONFIG_IRQ_WORK -BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR) -#endif - -#ifdef CONFIG_X86_THERMAL_VECTOR -BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) -#endif - -#ifdef CONFIG_X86_MCE_THRESHOLD -BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) -#endif - -#ifdef CONFIG_X86_MCE_AMD -BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR) -#endif -#endif diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 28183ee3cc42..b9527a54db99 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -152,7 +152,6 @@ extern void reserve_top_address(unsigned long reserve); extern int fixmaps_set; extern pte_t *kmap_pte; -#define kmap_prot PAGE_KERNEL extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index 7ec59edde154..d43717b423cb 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h @@ -31,8 +31,8 @@ #define CSW fd_routine[can_use_virtual_dma & 1] -#define fd_inb(port) inb_p(port) -#define fd_outb(value, port) outb_p(value, port) +#define fd_inb(base, reg) inb_p((base) + (reg)) +#define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) #define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy") #define fd_free_dma() CSW._free_dma(FLOPPY_DMA) @@ -77,25 +77,26 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id) st = 1; for (lcount = virtual_dma_count, lptr = virtual_dma_addr; lcount; lcount--, lptr++) { - st = inb(virtual_dma_port + 4) & 0xa0; - if (st != 0xa0) + st = inb(virtual_dma_port + FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) break; if (virtual_dma_mode) - outb_p(*lptr, virtual_dma_port + 5); + outb_p(*lptr, virtual_dma_port + FD_DATA); else - *lptr = inb_p(virtual_dma_port + 5); + *lptr = inb_p(virtual_dma_port + FD_DATA); } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = inb(virtual_dma_port + 4); + st = inb(virtual_dma_port + FD_STATUS); } #ifdef TRACE_FLPY_INT calls++; #endif - if (st == 0x20) + if (st == STATUS_DMA) return IRQ_HANDLED; - if (!(st & 0x20)) { + if (!(st & STATUS_DMA)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count = 0; #ifdef TRACE_FLPY_INT diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 44c48e34d799..42159f45bf9c 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -31,7 +31,8 @@ extern void fpu__save(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); extern int fpu__copy(struct task_struct *dst, struct task_struct *src); -extern void fpu__clear(struct fpu *fpu); +extern void fpu__clear_user_states(struct fpu *fpu); +extern void fpu__clear_all(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); @@ -92,7 +93,7 @@ static inline void fpstate_init_xstate(struct xregs_state *xsave) * XRSTORS requires these bits set in xcomp_bv, or it will * trigger #GP: */ - xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask; + xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all; } static inline void fpstate_init_fxstate(struct fxregs_state *fx) @@ -399,7 +400,10 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask) u32 hmask = mask >> 32; int err; - XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + if (static_cpu_has(X86_FEATURE_XSAVES)) + XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); + else + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); return err; } diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index c6136d79f8c0..422d8369012a 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -21,19 +21,29 @@ #define XSAVE_YMM_SIZE 256 #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) -/* Supervisor features */ -#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT) - -/* All currently supported features */ -#define XCNTXT_MASK (XFEATURE_MASK_FP | \ - XFEATURE_MASK_SSE | \ - XFEATURE_MASK_YMM | \ - XFEATURE_MASK_OPMASK | \ - XFEATURE_MASK_ZMM_Hi256 | \ - XFEATURE_MASK_Hi16_ZMM | \ - XFEATURE_MASK_PKRU | \ - XFEATURE_MASK_BNDREGS | \ - XFEATURE_MASK_BNDCSR) +/* All currently supported user features */ +#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR) + +/* All currently supported supervisor features */ +#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0) + +/* + * Unsupported supervisor features. When a supervisor feature in this mask is + * supported in the future, move it to the supported supervisor feature mask. + */ +#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT) + +/* All supervisor states including supported and unsupported states. */ +#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ + XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) #ifdef CONFIG_X86_64 #define REX_PREFIX "0x48, " @@ -41,7 +51,18 @@ #define REX_PREFIX #endif -extern u64 xfeatures_mask; +extern u64 xfeatures_mask_all; + +static inline u64 xfeatures_mask_supervisor(void) +{ + return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED; +} + +static inline u64 xfeatures_mask_user(void) +{ + return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED; +} + extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, @@ -54,8 +75,9 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); +void copy_supervisor_to_kernel(struct xregs_state *xsave); /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ -extern int validate_xstate_header(const struct xstate_header *hdr); +int validate_user_xstate_header(const struct xstate_header *hdr); #endif diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index a8059930056d..0f420b24e0fc 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -58,15 +58,6 @@ extern unsigned long highstart_pfn, highend_pfn; #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) -extern void *kmap_high(struct page *page); -extern void kunmap_high(struct page *page); - -void *kmap(struct page *page); -void kunmap(struct page *page); - -void *kmap_atomic_prot(struct page *page, pgprot_t prot); -void *kmap_atomic(struct page *page); -void __kunmap_atomic(void *kvaddr); void *kmap_atomic_pfn(unsigned long pfn); void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index f65cfb48cfdd..1721b1aadeb1 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -7,14 +7,4 @@ #define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE) -static inline int is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) { - return 0; -} - -static inline void arch_clear_hugepage_flags(struct page *page) -{ -} - #endif /* _ASM_X86_HUGETLB_H */ diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 4154bc5f6a4e..74c12437401e 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -28,28 +28,6 @@ #include <asm/irq.h> #include <asm/sections.h> -/* Interrupt handlers registered during init_IRQ */ -extern asmlinkage void apic_timer_interrupt(void); -extern asmlinkage void x86_platform_ipi(void); -extern asmlinkage void kvm_posted_intr_ipi(void); -extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); -extern asmlinkage void kvm_posted_intr_nested_ipi(void); -extern asmlinkage void error_interrupt(void); -extern asmlinkage void irq_work_interrupt(void); -extern asmlinkage void uv_bau_message_intr1(void); - -extern asmlinkage void spurious_interrupt(void); -extern asmlinkage void thermal_interrupt(void); -extern asmlinkage void reschedule_interrupt(void); - -extern asmlinkage void irq_move_cleanup_interrupt(void); -extern asmlinkage void reboot_interrupt(void); -extern asmlinkage void threshold_interrupt(void); -extern asmlinkage void deferred_error_interrupt(void); - -extern asmlinkage void call_function_interrupt(void); -extern asmlinkage void call_function_single_interrupt(void); - #ifdef CONFIG_X86_LOCAL_APIC struct irq_data; struct pci_dev; diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 29336574d0bc..7a4d2062385c 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -11,17 +11,6 @@ #include <linux/types.h> #include <asm/page.h> - -/* - * While not explicitly listed in the TLFS, Hyper-V always runs with a page size - * of 4096. These definitions are used when communicating with Hyper-V using - * guest physical pages and guest physical page addresses, since the guest page - * size may not be 4096 on all architectures. - */ -#define HV_HYP_PAGE_SHIFT 12 -#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) -#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) - /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). @@ -39,78 +28,41 @@ #define HYPERV_CPUID_MAX 0x4000ffff /* - * Feature identification. EAX indicates which features are available - * to the partition based upon the current partition privileges. - * These are HYPERV_CPUID_FEATURES.EAX bits. + * Aliases for Group A features that have X64 in the name. + * On x86/x64 these are HYPERV_CPUID_FEATURES.EAX bits. */ -/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */ -#define HV_X64_MSR_VP_RUNTIME_AVAILABLE BIT(0) -/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ -#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1) -/* - * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM - * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available - */ -#define HV_X64_MSR_SYNIC_AVAILABLE BIT(2) -/* - * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through - * HV_X64_MSR_STIMER3_COUNT) available - */ -#define HV_MSR_SYNTIMER_AVAILABLE BIT(3) -/* - * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) - * are available - */ -#define HV_X64_MSR_APIC_ACCESS_AVAILABLE BIT(4) -/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/ -#define HV_X64_MSR_HYPERCALL_AVAILABLE BIT(5) -/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/ -#define HV_X64_MSR_VP_INDEX_AVAILABLE BIT(6) -/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/ -#define HV_X64_MSR_RESET_AVAILABLE BIT(7) -/* - * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE, - * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE, - * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available - */ -#define HV_X64_MSR_STAT_PAGES_AVAILABLE BIT(8) -/* Partition reference TSC MSR is available */ -#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9) -/* Partition Guest IDLE MSR is available */ -#define HV_X64_MSR_GUEST_IDLE_AVAILABLE BIT(10) -/* - * There is a single feature flag that signifies if the partition has access - * to MSRs with local APIC and TSC frequencies. - */ -#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11) -/* AccessReenlightenmentControls privilege */ -#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13) -/* AccessTscInvariantControls privilege */ -#define HV_X64_ACCESS_TSC_INVARIANT BIT(15) +#define HV_X64_MSR_VP_RUNTIME_AVAILABLE \ + HV_MSR_VP_RUNTIME_AVAILABLE +#define HV_X64_MSR_SYNIC_AVAILABLE \ + HV_MSR_SYNIC_AVAILABLE +#define HV_X64_MSR_APIC_ACCESS_AVAILABLE \ + HV_MSR_APIC_ACCESS_AVAILABLE +#define HV_X64_MSR_HYPERCALL_AVAILABLE \ + HV_MSR_HYPERCALL_AVAILABLE +#define HV_X64_MSR_VP_INDEX_AVAILABLE \ + HV_MSR_VP_INDEX_AVAILABLE +#define HV_X64_MSR_RESET_AVAILABLE \ + HV_MSR_RESET_AVAILABLE +#define HV_X64_MSR_GUEST_IDLE_AVAILABLE \ + HV_MSR_GUEST_IDLE_AVAILABLE +#define HV_X64_ACCESS_FREQUENCY_MSRS \ + HV_ACCESS_FREQUENCY_MSRS +#define HV_X64_ACCESS_REENLIGHTENMENT \ + HV_ACCESS_REENLIGHTENMENT +#define HV_X64_ACCESS_TSC_INVARIANT \ + HV_ACCESS_TSC_INVARIANT /* - * Feature identification: indicates which flags were specified at partition - * creation. The format is the same as the partition creation flag structure - * defined in section Partition Creation Flags. - * These are HYPERV_CPUID_FEATURES.EBX bits. + * Aliases for Group B features that have X64 in the name. + * On x86/x64 these are HYPERV_CPUID_FEATURES.EBX bits. */ -#define HV_X64_CREATE_PARTITIONS BIT(0) -#define HV_X64_ACCESS_PARTITION_ID BIT(1) -#define HV_X64_ACCESS_MEMORY_POOL BIT(2) -#define HV_X64_ADJUST_MESSAGE_BUFFERS BIT(3) -#define HV_X64_POST_MESSAGES BIT(4) -#define HV_X64_SIGNAL_EVENTS BIT(5) -#define HV_X64_CREATE_PORT BIT(6) -#define HV_X64_CONNECT_PORT BIT(7) -#define HV_X64_ACCESS_STATS BIT(8) -#define HV_X64_DEBUGGING BIT(11) -#define HV_X64_CPU_POWER_MANAGEMENT BIT(12) +#define HV_X64_POST_MESSAGES HV_POST_MESSAGES +#define HV_X64_SIGNAL_EVENTS HV_SIGNAL_EVENTS /* - * Feature identification. EDX indicates which miscellaneous features - * are available to the partition. - * These are HYPERV_CPUID_FEATURES.EDX bits. + * Group D Features. The bit assignments are custom to each architecture. + * On x86/x64 these are HYPERV_CPUID_FEATURES.EDX bits. */ /* The MWAIT instruction is available (per section MONITOR / MWAIT) */ #define HV_X64_MWAIT_AVAILABLE BIT(0) @@ -131,6 +83,8 @@ #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) /* Crash MSR available */ #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) +/* Support for debug MSRs available */ +#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) /* stimer Direct Mode is available */ #define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) @@ -187,7 +141,7 @@ * processor, except for virtual processors that are reported as sibling SMT * threads. */ -#define HV_X64_NO_NONARCH_CORESHARING BIT(18) +#define HV_X64_NO_NONARCH_CORESHARING BIT(18) /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ #define HV_X64_NESTED_DIRECT_FLUSH BIT(17) @@ -295,43 +249,6 @@ union hv_x64_msr_hypercall_contents { } __packed; }; -/* - * TSC page layout. - */ -struct ms_hyperv_tsc_page { - volatile u32 tsc_sequence; - u32 reserved1; - volatile u64 tsc_scale; - volatile s64 tsc_offset; - u64 reserved2[509]; -} __packed; - -/* - * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is - * specified in the Hyper-V specification: - * - * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx - * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * - * - * Bit(s) - * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 - * 55:48 - Distro specific identification - * 47:16 - Linux kernel version number - * 15:0 - Distro specific identification - * - * - */ - -#define HV_LINUX_VENDOR_ID 0x8100 - struct hv_reenlightenment_control { __u64 vector:8; __u64 reserved1:8; @@ -355,31 +272,12 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) -/* - * Crash notification (HV_X64_MSR_CRASH_CTL) flags. - */ -#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62) -#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63) #define HV_X64_MSR_CRASH_PARAMS \ (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) #define HV_IPI_LOW_VECTOR 0x10 #define HV_IPI_HIGH_VECTOR 0xff -/* Declare the various hypercall operations. */ -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 -#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 -#define HVCALL_SEND_IPI 0x000b -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 -#define HVCALL_SEND_IPI_EX 0x0015 -#define HVCALL_POST_MESSAGE 0x005c -#define HVCALL_SIGNAL_EVENT 0x005d -#define HVCALL_RETARGET_INTERRUPT 0x007e -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 - #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ @@ -391,75 +289,6 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 -#define HV_PROCESSOR_POWER_STATE_C0 0 -#define HV_PROCESSOR_POWER_STATE_C1 1 -#define HV_PROCESSOR_POWER_STATE_C2 2 -#define HV_PROCESSOR_POWER_STATE_C3 3 - -#define HV_FLUSH_ALL_PROCESSORS BIT(0) -#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) -#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) -#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) - -enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARSE_4K, - HV_GENERIC_SET_ALL, -}; - -#define HV_PARTITION_ID_SELF ((u64)-1) - -#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) -#define HV_HYPERCALL_FAST_BIT BIT(16) -#define HV_HYPERCALL_VARHEAD_OFFSET 17 -#define HV_HYPERCALL_REP_COMP_OFFSET 32 -#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) -#define HV_HYPERCALL_REP_START_OFFSET 48 -#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) - -/* hypercall status code */ -#define HV_STATUS_SUCCESS 0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 -#define HV_STATUS_INVALID_ALIGNMENT 4 -#define HV_STATUS_INVALID_PARAMETER 5 -#define HV_STATUS_INSUFFICIENT_MEMORY 11 -#define HV_STATUS_INVALID_PORT_ID 17 -#define HV_STATUS_INVALID_CONNECTION_ID 18 -#define HV_STATUS_INSUFFICIENT_BUFFERS 19 - -/* - * The Hyper-V TimeRefCount register and the TSC - * page provide a guest VM clock with 100ns tick rate - */ -#define HV_CLOCK_HZ (NSEC_PER_SEC/100) - -typedef struct _HV_REFERENCE_TSC_PAGE { - __u32 tsc_sequence; - __u32 res1; - __u64 tsc_scale; - __s64 tsc_offset; -} __packed HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; - -/* Define the number of synthetic interrupt sources. */ -#define HV_SYNIC_SINT_COUNT (16) -/* Define the expected SynIC version. */ -#define HV_SYNIC_VERSION_1 (0x1) -/* Valid SynIC vectors are 16-255. */ -#define HV_SYNIC_FIRST_VALID_VECTOR (16) - -#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) -#define HV_SYNIC_SIMP_ENABLE (1ULL << 0) -#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) -#define HV_SYNIC_SINT_MASKED (1ULL << 16) -#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) -#define HV_SYNIC_SINT_VECTOR_MASK (0xFF) - -#define HV_SYNIC_STIMER_COUNT (4) - -/* Define synthetic interrupt controller message constants. */ -#define HV_MESSAGE_SIZE (256) -#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) -#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) /* Define hypervisor message types. */ enum hv_message_type { @@ -470,76 +299,25 @@ enum hv_message_type { HVMSG_GPA_INTERCEPT = 0x80000001, /* Timer notification messages. */ - HVMSG_TIMER_EXPIRED = 0x80000010, + HVMSG_TIMER_EXPIRED = 0x80000010, /* Error messages. */ HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, - HVMSG_UNSUPPORTED_FEATURE = 0x80000022, + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, /* Trace buffer complete messages. */ HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, /* Platform-specific processor intercept messages. */ - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, + HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, HVMSG_X64_MSR_INTERCEPT = 0x80010001, - HVMSG_X64_CPUID_INTERCEPT = 0x80010002, + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, - HVMSG_X64_APIC_EOI = 0x80010004, - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 -}; - -/* Define synthetic interrupt controller message flags. */ -union hv_message_flags { - __u8 asu8; - struct { - __u8 msg_pending:1; - __u8 reserved:7; - } __packed; -}; - -/* Define port identifier type. */ -union hv_port_id { - __u32 asu32; - struct { - __u32 id:24; - __u32 reserved:8; - } __packed u; + HVMSG_X64_APIC_EOI = 0x80010004, + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 }; -/* Define synthetic interrupt controller message header. */ -struct hv_message_header { - __u32 message_type; - __u8 payload_size; - union hv_message_flags message_flags; - __u8 reserved[2]; - union { - __u64 sender; - union hv_port_id port; - }; -} __packed; - -/* Define synthetic interrupt controller message format. */ -struct hv_message { - struct hv_message_header header; - union { - __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; - } u; -} __packed; - -/* Define the synthetic interrupt message page layout. */ -struct hv_message_page { - struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; -} __packed; - -/* Define timer message payload structure. */ -struct hv_timer_message_payload { - __u32 timer_index; - __u32 reserved; - __u64 expiration_time; /* When the timer expired */ - __u64 delivery_time; /* When the message was delivered */ -} __packed; - struct hv_nested_enlightenments_control { struct { __u32 directhypercall:1; @@ -767,187 +545,11 @@ struct hv_enlightened_vmcs { #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF -/* Define synthetic interrupt controller flag constants. */ -#define HV_EVENT_FLAGS_COUNT (256 * 8) -#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long)) - -/* - * Synthetic timer configuration. - */ -union hv_stimer_config { - u64 as_uint64; - struct { - u64 enable:1; - u64 periodic:1; - u64 lazy:1; - u64 auto_enable:1; - u64 apic_vector:8; - u64 direct_mode:1; - u64 reserved_z0:3; - u64 sintx:4; - u64 reserved_z1:44; - } __packed; -}; - - -/* Define the synthetic interrupt controller event flags format. */ -union hv_synic_event_flags { - unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT]; -}; - -/* Define SynIC control register. */ -union hv_synic_scontrol { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:63; - } __packed; -}; - -/* Define synthetic interrupt source. */ -union hv_synic_sint { - u64 as_uint64; - struct { - u64 vector:8; - u64 reserved1:8; - u64 masked:1; - u64 auto_eoi:1; - u64 polling:1; - u64 reserved2:45; - } __packed; -}; - -/* Define the format of the SIMP register */ -union hv_synic_simp { - u64 as_uint64; - struct { - u64 simp_enabled:1; - u64 preserved:11; - u64 base_simp_gpa:52; - } __packed; -}; - -/* Define the format of the SIEFP register */ -union hv_synic_siefp { - u64 as_uint64; - struct { - u64 siefp_enabled:1; - u64 preserved:11; - u64 base_siefp_gpa:52; - } __packed; -}; - -struct hv_vpset { - u64 format; - u64 valid_bank_mask; - u64 bank_contents[]; -} __packed; - -/* HvCallSendSyntheticClusterIpi hypercall */ -struct hv_send_ipi { - u32 vector; - u32 reserved; - u64 cpu_mask; -} __packed; - -/* HvCallSendSyntheticClusterIpiEx hypercall */ -struct hv_send_ipi_ex { - u32 vector; - u32 reserved; - struct hv_vpset vp_set; -} __packed; - -/* HvFlushGuestPhysicalAddressSpace hypercalls */ -struct hv_guest_mapping_flush { - u64 address_space; - u64 flags; -} __packed; - -/* - * HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited - * by the bitwidth of "additional_pages" in union hv_gpa_page_range. - */ -#define HV_MAX_FLUSH_PAGES (2048) - -/* HvFlushGuestPhysicalAddressList hypercall */ -union hv_gpa_page_range { - u64 address_space; - struct { - u64 additional_pages:11; - u64 largepage:1; - u64 basepfn:52; - } page; -}; - -/* - * All input flush parameters should be in single page. The max flush - * count is equal with how many entries of union hv_gpa_page_range can - * be populated into the input parameter page. - */ -#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \ - sizeof(union hv_gpa_page_range)) - -struct hv_guest_mapping_flush_list { - u64 address_space; - u64 flags; - union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT]; -}; - -/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ -struct hv_tlb_flush { - u64 address_space; - u64 flags; - u64 processor_mask; - u64 gva_list[]; -} __packed; - -/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ -struct hv_tlb_flush_ex { - u64 address_space; - u64 flags; - struct hv_vpset hv_vp_set; - u64 gva_list[]; -} __packed; - struct hv_partition_assist_pg { u32 tlb_lock_count; }; -union hv_msi_entry { - u64 as_uint64; - struct { - u32 address; - u32 data; - } __packed; -}; - -struct hv_interrupt_entry { - u32 source; /* 1 for MSI(-X) */ - u32 reserved1; - union hv_msi_entry msi_entry; -} __packed; -/* - * flags for hv_device_interrupt_target.flags - */ -#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1 -#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2 - -struct hv_device_interrupt_target { - u32 vector; - u32 flags; - union { - u64 vp_mask; - struct hv_vpset vp_set; - }; -} __packed; +#include <asm-generic/hyperv-tlfs.h> -/* HvRetargetDeviceInterrupt hypercall */ -struct hv_retarget_device_interrupt { - u64 partition_id; /* use "self" */ - u64 device_id; - struct hv_interrupt_entry int_entry; - u64 reserved2; - struct hv_device_interrupt_target int_target; -} __packed __aligned(8); #endif diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h new file mode 100644 index 000000000000..cf51c50eb356 --- /dev/null +++ b/arch/x86/include/asm/idtentry.h @@ -0,0 +1,652 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IDTENTRY_H +#define _ASM_X86_IDTENTRY_H + +/* Interrupts/Exceptions */ +#include <asm/trapnr.h> + +#ifndef __ASSEMBLY__ +#include <linux/hardirq.h> + +#include <asm/irq_stack.h> + +void idtentry_enter_user(struct pt_regs *regs); +void idtentry_exit_user(struct pt_regs *regs); + +bool idtentry_enter_cond_rcu(struct pt_regs *regs); +void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit); + +/** + * DECLARE_IDTENTRY - Declare functions for simple IDT entry points + * No error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Note: This is the C variant of DECLARE_IDTENTRY(). As the name says it + * declares the entry points for usage in C code. There is an ASM variant + * as well which is used to emit the entry stubs in entry_32/64.S. + */ +#define DECLARE_IDTENTRY(vector, func) \ + asmlinkage void asm_##func(void); \ + asmlinkage void xen_asm_##func(void); \ + __visible void func(struct pt_regs *regs) + +/** + * DEFINE_IDTENTRY - Emit code for simple IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * idtentry_enter() contains common code which has to be invoked before + * arbitrary code in the body. idtentry_exit() contains common code + * which has to run before returning to the low level assembly code. + */ +#define DEFINE_IDTENTRY(func) \ +static __always_inline void __##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + bool rcu_exit = idtentry_enter_cond_rcu(regs); \ + \ + instrumentation_begin(); \ + __##func (regs); \ + instrumentation_end(); \ + idtentry_exit_cond_rcu(regs, rcu_exit); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs) + +/* Special case for 32bit IRET 'trap' */ +#define DECLARE_IDTENTRY_SW DECLARE_IDTENTRY +#define DEFINE_IDTENTRY_SW DEFINE_IDTENTRY + +/** + * DECLARE_IDTENTRY_ERRORCODE - Declare functions for simple IDT entry points + * Error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Same as DECLARE_IDTENTRY, but has an extra error_code argument for the + * C-handler. + */ +#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \ + asmlinkage void asm_##func(void); \ + asmlinkage void xen_asm_##func(void); \ + __visible void func(struct pt_regs *regs, unsigned long error_code) + +/** + * DEFINE_IDTENTRY_ERRORCODE - Emit code for simple IDT entry points + * Error code pushed by hardware + * @func: Function name of the entry point + * + * Same as DEFINE_IDTENTRY, but has an extra error_code argument + */ +#define DEFINE_IDTENTRY_ERRORCODE(func) \ +static __always_inline void __##func(struct pt_regs *regs, \ + unsigned long error_code); \ + \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code) \ +{ \ + bool rcu_exit = idtentry_enter_cond_rcu(regs); \ + \ + instrumentation_begin(); \ + __##func (regs, error_code); \ + instrumentation_end(); \ + idtentry_exit_cond_rcu(regs, rcu_exit); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs, \ + unsigned long error_code) + +/** + * DECLARE_IDTENTRY_RAW - Declare functions for raw IDT entry points + * No error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY(). + */ +#define DECLARE_IDTENTRY_RAW(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +/** + * DEFINE_IDTENTRY_RAW - Emit code for raw IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * Contrary to DEFINE_IDTENTRY() this does not invoke the + * idtentry_enter/exit() helpers before and after the body invocation. This + * needs to be done in the body itself if applicable. Use if extra work + * is required before the enter/exit() helpers are invoked. + */ +#define DEFINE_IDTENTRY_RAW(func) \ +__visible noinstr void func(struct pt_regs *regs) + +/** + * DECLARE_IDTENTRY_RAW_ERRORCODE - Declare functions for raw IDT entry points + * Error code pushed by hardware + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_ERRORCODE() + */ +#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_RAW_ERRORCODE - Emit code for raw IDT entry points + * @func: Function name of the entry point + * + * @func is called from ASM entry code with interrupts disabled. + * + * The macro is written so it acts as function definition. Append the + * body with a pair of curly brackets. + * + * Contrary to DEFINE_IDTENTRY_ERRORCODE() this does not invoke the + * idtentry_enter/exit() helpers before and after the body invocation. This + * needs to be done in the body itself if applicable. Use if extra work + * is required before the enter/exit() helpers are invoked. + */ +#define DEFINE_IDTENTRY_RAW_ERRORCODE(func) \ +__visible noinstr void func(struct pt_regs *regs, unsigned long error_code) + +/** + * DECLARE_IDTENTRY_IRQ - Declare functions for device interrupt IDT entry + * points (common/spurious) + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_ERRORCODE() + */ +#define DECLARE_IDTENTRY_IRQ(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_IRQ - Emit code for device interrupt IDT entry points + * @func: Function name of the entry point + * + * The vector number is pushed by the low level entry stub and handed + * to the function as error_code argument which needs to be truncated + * to an u8 because the push is sign extending. + * + * On 64-bit idtentry_enter/exit() are invoked in the ASM entry code before + * and after switching to the interrupt stack. On 32-bit this happens in C. + * + * irq_enter/exit_rcu() are invoked before the function body and the + * KVM L1D flush request is set. + */ +#define DEFINE_IDTENTRY_IRQ(func) \ +static __always_inline void __##func(struct pt_regs *regs, u8 vector); \ + \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code) \ +{ \ + bool rcu_exit = idtentry_enter_cond_rcu(regs); \ + \ + instrumentation_begin(); \ + irq_enter_rcu(); \ + kvm_set_cpu_l1tf_flush_l1d(); \ + __##func (regs, (u8)error_code); \ + irq_exit_rcu(); \ + instrumentation_end(); \ + idtentry_exit_cond_rcu(regs, rcu_exit); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs, u8 vector) + +/** + * DECLARE_IDTENTRY_SYSVEC - Declare functions for system vector entry points + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Maps to DECLARE_IDTENTRY(). + */ +#define DECLARE_IDTENTRY_SYSVEC(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +/** + * DEFINE_IDTENTRY_SYSVEC - Emit code for system vector IDT entry points + * @func: Function name of the entry point + * + * idtentry_enter/exit() and irq_enter/exit_rcu() are invoked before the + * function body. KVM L1D flush request is set. + * + * Runs the function on the interrupt stack if the entry hit kernel mode + */ +#define DEFINE_IDTENTRY_SYSVEC(func) \ +static void __##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + bool rcu_exit = idtentry_enter_cond_rcu(regs); \ + \ + instrumentation_begin(); \ + irq_enter_rcu(); \ + kvm_set_cpu_l1tf_flush_l1d(); \ + run_on_irqstack_cond(__##func, regs, regs); \ + irq_exit_rcu(); \ + instrumentation_end(); \ + idtentry_exit_cond_rcu(regs, rcu_exit); \ +} \ + \ +static noinline void __##func(struct pt_regs *regs) + +/** + * DEFINE_IDTENTRY_SYSVEC_SIMPLE - Emit code for simple system vector IDT + * entry points + * @func: Function name of the entry point + * + * Runs the function on the interrupted stack. No switch to IRQ stack and + * only the minimal __irq_enter/exit() handling. + * + * Only use for 'empty' vectors like reschedule IPI and KVM posted + * interrupt vectors. + */ +#define DEFINE_IDTENTRY_SYSVEC_SIMPLE(func) \ +static __always_inline void __##func(struct pt_regs *regs); \ + \ +__visible noinstr void func(struct pt_regs *regs) \ +{ \ + bool rcu_exit = idtentry_enter_cond_rcu(regs); \ + \ + instrumentation_begin(); \ + __irq_enter_raw(); \ + kvm_set_cpu_l1tf_flush_l1d(); \ + __##func (regs); \ + __irq_exit_raw(); \ + instrumentation_end(); \ + idtentry_exit_cond_rcu(regs, rcu_exit); \ +} \ + \ +static __always_inline void __##func(struct pt_regs *regs) + +/** + * DECLARE_IDTENTRY_XENCB - Declare functions for XEN HV callback entry point + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares three functions: + * - The ASM entry point: asm_##func + * - The XEN PV trap entry point: xen_##func (maybe unused) + * - The C handler called from the ASM entry point + * + * Maps to DECLARE_IDTENTRY(). Distinct entry point to handle the 32/64-bit + * difference + */ +#define DECLARE_IDTENTRY_XENCB(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +#ifdef CONFIG_X86_64 +/** + * DECLARE_IDTENTRY_IST - Declare functions for IST handling IDT entry points + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_RAW, but declares also the NOIST C handler + * which is called from the ASM entry point on user mode entry + */ +#define DECLARE_IDTENTRY_IST(vector, func) \ + DECLARE_IDTENTRY_RAW(vector, func); \ + __visible void noist_##func(struct pt_regs *regs) + +/** + * DEFINE_IDTENTRY_IST - Emit code for IST entry points + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW + */ +#define DEFINE_IDTENTRY_IST(func) \ + DEFINE_IDTENTRY_RAW(func) + +/** + * DEFINE_IDTENTRY_NOIST - Emit code for NOIST entry points which + * belong to a IST entry point (MCE, DB) + * @func: Function name of the entry point. Must be the same as + * the function name of the corresponding IST variant + * + * Maps to DEFINE_IDTENTRY_RAW(). + */ +#define DEFINE_IDTENTRY_NOIST(func) \ + DEFINE_IDTENTRY_RAW(noist_##func) + +/** + * DECLARE_IDTENTRY_DF - Declare functions for double fault + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Maps to DECLARE_IDTENTRY_RAW_ERRORCODE + */ +#define DECLARE_IDTENTRY_DF(vector, func) \ + DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) + +/** + * DEFINE_IDTENTRY_DF - Emit code for double fault + * @func: Function name of the entry point + * + * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE + */ +#define DEFINE_IDTENTRY_DF(func) \ + DEFINE_IDTENTRY_RAW_ERRORCODE(func) + +#else /* CONFIG_X86_64 */ + +/* Maps to a regular IDTENTRY on 32bit for now */ +# define DECLARE_IDTENTRY_IST DECLARE_IDTENTRY +# define DEFINE_IDTENTRY_IST DEFINE_IDTENTRY + +/** + * DECLARE_IDTENTRY_DF - Declare functions for double fault 32bit variant + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Declares two functions: + * - The ASM entry point: asm_##func + * - The C handler called from the C shim + */ +#define DECLARE_IDTENTRY_DF(vector, func) \ + asmlinkage void asm_##func(void); \ + __visible void func(struct pt_regs *regs, \ + unsigned long error_code, \ + unsigned long address) + +/** + * DEFINE_IDTENTRY_DF - Emit code for double fault on 32bit + * @func: Function name of the entry point + * + * This is called through the doublefault shim which already provides + * cr2 in the address argument. + */ +#define DEFINE_IDTENTRY_DF(func) \ +__visible noinstr void func(struct pt_regs *regs, \ + unsigned long error_code, \ + unsigned long address) + +#endif /* !CONFIG_X86_64 */ + +/* C-Code mapping */ +#define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST +#define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST +#define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST + +#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW +#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW + +#define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST +#define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST +#define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST + +/** + * DECLARE_IDTENTRY_XEN - Declare functions for XEN redirect IDT entry points + * @vector: Vector number (ignored for C) + * @func: Function name of the entry point + * + * Used for xennmi and xendebug redirections. No DEFINE as this is all ASM + * indirection magic. + */ +#define DECLARE_IDTENTRY_XEN(vector, func) \ + asmlinkage void xen_asm_exc_xen##func(void); \ + asmlinkage void asm_exc_xen##func(void) + +#else /* !__ASSEMBLY__ */ + +/* + * The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs. + */ +#define DECLARE_IDTENTRY(vector, func) \ + idtentry vector asm_##func func has_error_code=0 + +#define DECLARE_IDTENTRY_ERRORCODE(vector, func) \ + idtentry vector asm_##func func has_error_code=1 + +/* Special case for 32bit IRET 'trap'. Do not emit ASM code */ +#define DECLARE_IDTENTRY_SW(vector, func) + +#define DECLARE_IDTENTRY_RAW(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +#define DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func) \ + DECLARE_IDTENTRY_ERRORCODE(vector, func) + +/* Entries for common/spurious (device) interrupts */ +#define DECLARE_IDTENTRY_IRQ(vector, func) \ + idtentry_irq vector func + +/* System vector entries */ +#define DECLARE_IDTENTRY_SYSVEC(vector, func) \ + idtentry_sysvec vector func + +#ifdef CONFIG_X86_64 +# define DECLARE_IDTENTRY_MCE(vector, func) \ + idtentry_mce_db vector asm_##func func + +# define DECLARE_IDTENTRY_DEBUG(vector, func) \ + idtentry_mce_db vector asm_##func func + +# define DECLARE_IDTENTRY_DF(vector, func) \ + idtentry_df vector asm_##func func + +# define DECLARE_IDTENTRY_XENCB(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +#else +# define DECLARE_IDTENTRY_MCE(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +# define DECLARE_IDTENTRY_DEBUG(vector, func) \ + DECLARE_IDTENTRY(vector, func) + +/* No ASM emitted for DF as this goes through a C shim */ +# define DECLARE_IDTENTRY_DF(vector, func) + +/* No ASM emitted for XEN hypervisor callback */ +# define DECLARE_IDTENTRY_XENCB(vector, func) + +#endif + +/* No ASM code emitted for NMI */ +#define DECLARE_IDTENTRY_NMI(vector, func) + +/* XEN NMI and DB wrapper */ +#define DECLARE_IDTENTRY_XEN(vector, func) \ + idtentry vector asm_exc_xen##func exc_##func has_error_code=0 + +/* + * ASM code to emit the common vector entry stubs where each stub is + * packed into 8 bytes. + * + * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because + * GCC treats the local vector variable as unsigned int and would expand + * all vectors above 0x7F to a 5 byte push. The original code did an + * adjustment of the vector number to be in the signed byte range to avoid + * this. While clever it's mindboggling counterintuitive and requires the + * odd conversion back to a real vector number in the C entry points. Using + * .byte achieves the same thing and the only fixup needed in the C entry + * point is to mask off the bits above bit 7 because the push is sign + * extending. + */ + .align 8 +SYM_CODE_START(irq_entries_start) + vector=FIRST_EXTERNAL_VECTOR + pos = . + .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) + UNWIND_HINT_IRET_REGS + .byte 0x6a, vector + jmp asm_common_interrupt + nop + /* Ensure that the above is 8 bytes max */ + . = pos + 8 + pos=pos+8 + vector=vector+1 + .endr +SYM_CODE_END(irq_entries_start) + +#ifdef CONFIG_X86_LOCAL_APIC + .align 8 +SYM_CODE_START(spurious_entries_start) + vector=FIRST_SYSTEM_VECTOR + pos = . + .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) + UNWIND_HINT_IRET_REGS + .byte 0x6a, vector + jmp asm_spurious_interrupt + nop + /* Ensure that the above is 8 bytes max */ + . = pos + 8 + pos=pos+8 + vector=vector+1 + .endr +SYM_CODE_END(spurious_entries_start) +#endif + +#endif /* __ASSEMBLY__ */ + +/* + * The actual entry points. Note that DECLARE_IDTENTRY*() serves two + * purposes: + * - provide the function declarations when included from C-Code + * - emit the ASM stubs when included from entry_32/64.S + * + * This avoids duplicate defines and ensures that everything is consistent. + */ + +/* + * Dummy trap number so the low level ASM macro vector number checks do not + * match which results in emitting plain IDTENTRY stubs without bells and + * whistels. + */ +#define X86_TRAP_OTHER 0xFFFF + +/* Simple exception entry points. No hardware error code */ +DECLARE_IDTENTRY(X86_TRAP_DE, exc_divide_error); +DECLARE_IDTENTRY(X86_TRAP_OF, exc_overflow); +DECLARE_IDTENTRY(X86_TRAP_BR, exc_bounds); +DECLARE_IDTENTRY(X86_TRAP_NM, exc_device_not_available); +DECLARE_IDTENTRY(X86_TRAP_OLD_MF, exc_coproc_segment_overrun); +DECLARE_IDTENTRY(X86_TRAP_SPURIOUS, exc_spurious_interrupt_bug); +DECLARE_IDTENTRY(X86_TRAP_MF, exc_coprocessor_error); +DECLARE_IDTENTRY(X86_TRAP_XF, exc_simd_coprocessor_error); + +/* 32bit software IRET trap. Do not emit ASM code */ +DECLARE_IDTENTRY_SW(X86_TRAP_IRET, iret_error); + +/* Simple exception entries with error code pushed by hardware */ +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_TS, exc_invalid_tss); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_NP, exc_segment_not_present); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_SS, exc_stack_segment); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_GP, exc_general_protection); +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_AC, exc_alignment_check); + +/* Raw exception entries which need extra work */ +DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); +DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); +DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); + +#ifdef CONFIG_X86_MCE +DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); +#endif + +/* NMI */ +DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi); +DECLARE_IDTENTRY_XEN(X86_TRAP_NMI, nmi); + +/* #DB */ +DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug); +DECLARE_IDTENTRY_XEN(X86_TRAP_DB, debug); + +/* #DF */ +DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); + +#ifdef CONFIG_XEN_PV +DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback); +#endif + +/* Device interrupts common/spurious */ +DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, common_interrupt); +#ifdef CONFIG_X86_LOCAL_APIC +DECLARE_IDTENTRY_IRQ(X86_TRAP_OTHER, spurious_interrupt); +#endif + +/* System vector entry points */ +#ifdef CONFIG_X86_LOCAL_APIC +DECLARE_IDTENTRY_SYSVEC(ERROR_APIC_VECTOR, sysvec_error_interrupt); +DECLARE_IDTENTRY_SYSVEC(SPURIOUS_APIC_VECTOR, sysvec_spurious_apic_interrupt); +DECLARE_IDTENTRY_SYSVEC(LOCAL_TIMER_VECTOR, sysvec_apic_timer_interrupt); +DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi); +#endif + +#ifdef CONFIG_SMP +DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi); +DECLARE_IDTENTRY_SYSVEC(IRQ_MOVE_CLEANUP_VECTOR, sysvec_irq_move_cleanup); +DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot); +DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single); +DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC +# ifdef CONFIG_X86_UV +DECLARE_IDTENTRY_SYSVEC(UV_BAU_MESSAGE, sysvec_uv_bau_message); +# endif + +# ifdef CONFIG_X86_MCE_THRESHOLD +DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR, sysvec_threshold); +# endif + +# ifdef CONFIG_X86_MCE_AMD +DECLARE_IDTENTRY_SYSVEC(DEFERRED_ERROR_VECTOR, sysvec_deferred_error); +# endif + +# ifdef CONFIG_X86_THERMAL_VECTOR +DECLARE_IDTENTRY_SYSVEC(THERMAL_APIC_VECTOR, sysvec_thermal); +# endif + +# ifdef CONFIG_IRQ_WORK +DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work); +# endif +#endif + +#ifdef CONFIG_HAVE_KVM +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi); +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi); +DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi); +#endif + +#if IS_ENABLED(CONFIG_HYPERV) +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback); +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment); +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_STIMER0_VECTOR, sysvec_hyperv_stimer0); +#endif + +#if IS_ENABLED(CONFIG_ACRN_GUEST) +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_acrn_hv_callback); +#endif + +#ifdef CONFIG_XEN_PVHVM +DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback); +#endif + +#undef X86_TRAP_OTHER + +#endif diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 8f1e94f29a16..a338a6deb950 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -89,6 +89,8 @@ #define INTEL_FAM6_COMETLAKE 0xA5 #define INTEL_FAM6_COMETLAKE_L 0xA6 +#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 8e5af119dc2d..de58391bdee0 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -88,11 +88,17 @@ static inline bool intel_mid_has_msic(void) return (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL); } +extern void intel_scu_devices_create(void); +extern void intel_scu_devices_destroy(void); + #else /* !CONFIG_X86_INTEL_MID */ #define intel_mid_identify_cpu() 0 #define intel_mid_has_msic() 0 +static inline void intel_scu_devices_create(void) { } +static inline void intel_scu_devices_destroy(void) { } + #endif /* !CONFIG_X86_INTEL_MID */ enum intel_mid_timer_options { @@ -115,9 +121,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options; #define SFI_MTMR_MAX_NUM 8 #define SFI_MRTC_MAX 8 -extern void intel_scu_devices_create(void); -extern void intel_scu_devices_destroy(void); - /* VRTC timer */ #define MRST_VRTC_MAP_SZ 1024 /* #define MRST_VRTC_PGOFFSET 0xc00 */ diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h deleted file mode 100644 index e6da1ce26256..000000000000 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_INTEL_PMC_IPC_H_ -#define _ASM_X86_INTEL_PMC_IPC_H_ - -/* Commands */ -#define PMC_IPC_PMIC_ACCESS 0xFF -#define PMC_IPC_PMIC_ACCESS_READ 0x0 -#define PMC_IPC_PMIC_ACCESS_WRITE 0x1 -#define PMC_IPC_USB_PWR_CTRL 0xF0 -#define PMC_IPC_PMIC_BLACKLIST_SEL 0xEF -#define PMC_IPC_PHY_CONFIG 0xEE -#define PMC_IPC_NORTHPEAK_CTRL 0xED -#define PMC_IPC_PM_DEBUG 0xEC -#define PMC_IPC_PMC_TELEMTRY 0xEB -#define PMC_IPC_PMC_FW_MSG_CTRL 0xEA - -/* IPC return code */ -#define IPC_ERR_NONE 0 -#define IPC_ERR_CMD_NOT_SUPPORTED 1 -#define IPC_ERR_CMD_NOT_SERVICED 2 -#define IPC_ERR_UNABLE_TO_SERVICE 3 -#define IPC_ERR_CMD_INVALID 4 -#define IPC_ERR_CMD_FAILED 5 -#define IPC_ERR_EMSECURITY 6 -#define IPC_ERR_UNSIGNEDKERNEL 7 - -/* GCR reg offsets from gcr base*/ -#define PMC_GCR_PMC_CFG_REG 0x08 -#define PMC_GCR_TELEM_DEEP_S0IX_REG 0x78 -#define PMC_GCR_TELEM_SHLW_S0IX_REG 0x80 - -#if IS_ENABLED(CONFIG_INTEL_PMC_IPC) - -int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen); -int intel_pmc_s0ix_counter_read(u64 *data); -int intel_pmc_gcr_read64(u32 offset, u64 *data); - -#else - -static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen) -{ - return -EINVAL; -} - -static inline int intel_pmc_s0ix_counter_read(u64 *data) -{ - return -EINVAL; -} - -static inline int intel_pmc_gcr_read64(u32 offset, u64 *data) -{ - return -EINVAL; -} - -#endif /*CONFIG_INTEL_PMC_IPC*/ - -#endif diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h index 2a1442ba6e78..11d457af68c5 100644 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -2,61 +2,69 @@ #ifndef _ASM_X86_INTEL_SCU_IPC_H_ #define _ASM_X86_INTEL_SCU_IPC_H_ -#include <linux/notifier.h> - -#define IPCMSG_INDIRECT_READ 0x02 -#define IPCMSG_INDIRECT_WRITE 0x05 - -#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */ - -#define IPCMSG_WARM_RESET 0xF0 -#define IPCMSG_COLD_RESET 0xF1 -#define IPCMSG_SOFT_RESET 0xF2 -#define IPCMSG_COLD_BOOT 0xF3 - -#define IPCMSG_VRTC 0xFA /* Set vRTC device */ - /* Command id associated with message IPCMSG_VRTC */ - #define IPC_CMD_VRTC_SETTIME 1 /* Set time */ - #define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */ - -/* Read single register */ -int intel_scu_ipc_ioread8(u16 addr, u8 *data); - -/* Read a vector */ -int intel_scu_ipc_readv(u16 *addr, u8 *data, int len); - -/* Write single register */ -int intel_scu_ipc_iowrite8(u16 addr, u8 data); - -/* Write a vector */ -int intel_scu_ipc_writev(u16 *addr, u8 *data, int len); - -/* Update single register based on the mask */ -int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); - -/* Issue commands to the SCU with or without data */ -int intel_scu_ipc_simple_command(int cmd, int sub); -int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, - u32 *out, int outlen); - -extern struct blocking_notifier_head intel_scu_notifier; - -static inline void intel_scu_notifier_add(struct notifier_block *nb) -{ - blocking_notifier_chain_register(&intel_scu_notifier, nb); -} - -static inline void intel_scu_notifier_remove(struct notifier_block *nb) -{ - blocking_notifier_chain_unregister(&intel_scu_notifier, nb); -} - -static inline int intel_scu_notifier_post(unsigned long v, void *p) +#include <linux/ioport.h> + +struct device; +struct intel_scu_ipc_dev; + +/** + * struct intel_scu_ipc_data - Data used to configure SCU IPC + * @mem: Base address of SCU IPC MMIO registers + * @irq: The IRQ number used for SCU (optional) + */ +struct intel_scu_ipc_data { + struct resource mem; + int irq; +}; + +struct intel_scu_ipc_dev * +__intel_scu_ipc_register(struct device *parent, + const struct intel_scu_ipc_data *scu_data, + struct module *owner); + +#define intel_scu_ipc_register(parent, scu_data) \ + __intel_scu_ipc_register(parent, scu_data, THIS_MODULE) + +void intel_scu_ipc_unregister(struct intel_scu_ipc_dev *scu); + +struct intel_scu_ipc_dev * +__devm_intel_scu_ipc_register(struct device *parent, + const struct intel_scu_ipc_data *scu_data, + struct module *owner); + +#define devm_intel_scu_ipc_register(parent, scu_data) \ + __devm_intel_scu_ipc_register(parent, scu_data, THIS_MODULE) + +struct intel_scu_ipc_dev *intel_scu_ipc_dev_get(void); +void intel_scu_ipc_dev_put(struct intel_scu_ipc_dev *scu); +struct intel_scu_ipc_dev *devm_intel_scu_ipc_dev_get(struct device *dev); + +int intel_scu_ipc_dev_ioread8(struct intel_scu_ipc_dev *scu, u16 addr, + u8 *data); +int intel_scu_ipc_dev_iowrite8(struct intel_scu_ipc_dev *scu, u16 addr, + u8 data); +int intel_scu_ipc_dev_readv(struct intel_scu_ipc_dev *scu, u16 *addr, + u8 *data, size_t len); +int intel_scu_ipc_dev_writev(struct intel_scu_ipc_dev *scu, u16 *addr, + u8 *data, size_t len); + +int intel_scu_ipc_dev_update(struct intel_scu_ipc_dev *scu, u16 addr, + u8 data, u8 mask); + +int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd, + int sub); +int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd, + int sub, const void *in, size_t inlen, + size_t size, void *out, size_t outlen); + +static inline int intel_scu_ipc_dev_command(struct intel_scu_ipc_dev *scu, int cmd, + int sub, const void *in, size_t inlen, + void *out, size_t outlen) { - return blocking_notifier_call_chain(&intel_scu_notifier, v, p); + return intel_scu_ipc_dev_command_with_size(scu, cmd, sub, in, inlen, + inlen, out, outlen); } -#define SCU_AVAILABLE 1 -#define SCU_DOWN 2 +#include <asm/intel_scu_ipc_legacy.h> #endif diff --git a/arch/x86/include/asm/intel_scu_ipc_legacy.h b/arch/x86/include/asm/intel_scu_ipc_legacy.h new file mode 100644 index 000000000000..4cf13fecb673 --- /dev/null +++ b/arch/x86/include/asm/intel_scu_ipc_legacy.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INTEL_SCU_IPC_LEGACY_H_ +#define _ASM_X86_INTEL_SCU_IPC_LEGACY_H_ + +#include <linux/notifier.h> + +#define IPCMSG_INDIRECT_READ 0x02 +#define IPCMSG_INDIRECT_WRITE 0x05 + +#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */ + +#define IPCMSG_WARM_RESET 0xF0 +#define IPCMSG_COLD_RESET 0xF1 +#define IPCMSG_SOFT_RESET 0xF2 +#define IPCMSG_COLD_BOOT 0xF3 + +#define IPCMSG_VRTC 0xFA /* Set vRTC device */ +/* Command id associated with message IPCMSG_VRTC */ +#define IPC_CMD_VRTC_SETTIME 1 /* Set time */ +#define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */ + +/* Don't call these in new code - they will be removed eventually */ + +/* Read single register */ +static inline int intel_scu_ipc_ioread8(u16 addr, u8 *data) +{ + return intel_scu_ipc_dev_ioread8(NULL, addr, data); +} + +/* Read a vector */ +static inline int intel_scu_ipc_readv(u16 *addr, u8 *data, int len) +{ + return intel_scu_ipc_dev_readv(NULL, addr, data, len); +} + +/* Write single register */ +static inline int intel_scu_ipc_iowrite8(u16 addr, u8 data) +{ + return intel_scu_ipc_dev_iowrite8(NULL, addr, data); +} + +/* Write a vector */ +static inline int intel_scu_ipc_writev(u16 *addr, u8 *data, int len) +{ + return intel_scu_ipc_dev_writev(NULL, addr, data, len); +} + +/* Update single register based on the mask */ +static inline int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask) +{ + return intel_scu_ipc_dev_update(NULL, addr, data, mask); +} + +/* Issue commands to the SCU with or without data */ +static inline int intel_scu_ipc_simple_command(int cmd, int sub) +{ + return intel_scu_ipc_dev_simple_command(NULL, cmd, sub); +} + +static inline int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, + u32 *out, int outlen) +{ + /* New API takes both inlen and outlen as bytes so convert here */ + size_t inbytes = inlen * sizeof(u32); + size_t outbytes = outlen * sizeof(u32); + + return intel_scu_ipc_dev_command_with_size(NULL, cmd, sub, in, inbytes, + inlen, out, outbytes); +} + +extern struct blocking_notifier_head intel_scu_notifier; + +static inline void intel_scu_notifier_add(struct notifier_block *nb) +{ + blocking_notifier_chain_register(&intel_scu_notifier, nb); +} + +static inline void intel_scu_notifier_remove(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&intel_scu_notifier, nb); +} + +static inline int intel_scu_notifier_post(unsigned long v, void *p) +{ + return blocking_notifier_call_chain(&intel_scu_notifier, v, p); +} + +#define SCU_AVAILABLE 1 +#define SCU_DOWN 2 + +#endif diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h index 2f77e31a1283..8046e70dfd7c 100644 --- a/arch/x86/include/asm/intel_telemetry.h +++ b/arch/x86/include/asm/intel_telemetry.h @@ -10,6 +10,8 @@ #define TELEM_MAX_EVENTS_SRAM 28 #define TELEM_MAX_OS_ALLOCATED_EVENTS 20 +#include <asm/intel_scu_ipc.h> + enum telemetry_unit { TELEM_PSS = 0, TELEM_IOSS, @@ -51,6 +53,8 @@ struct telemetry_plt_config { struct telemetry_unit_config ioss_config; struct mutex telem_trace_lock; struct mutex telem_lock; + struct intel_pmc_dev *pmc; + struct intel_scu_ipc_dev *scu; bool telem_in_use; }; @@ -92,7 +96,7 @@ int telemetry_set_pltdata(const struct telemetry_core_ops *ops, int telemetry_clear_pltdata(void); -int telemetry_pltconfig_valid(void); +struct telemetry_plt_config *telemetry_get_pltdata(void); int telemetry_get_evtname(enum telemetry_unit telem_unit, const char **name, int len); diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h index 989cfa86de85..734482afbf81 100644 --- a/arch/x86/include/asm/invpcid.h +++ b/arch/x86/include/asm/invpcid.h @@ -12,12 +12,9 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, * stale TLB entries and, especially if we're flushing global * mappings, we don't want the compiler to reorder any subsequent * memory accesses before the TLB flush. - * - * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and - * invpcid (%rcx), %rax in long mode. */ - asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" - : : "m" (desc), "a" (type), "c" (&desc) : "memory"); + asm volatile("invpcid %[desc], %[type]" + :: [desc] "m" (desc), [type] "r" (type) : "memory"); } #define INVPCID_TYPE_INDIV_ADDR 0 diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 2a7b3211ee7a..bacf68c4d70e 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -10,7 +10,6 @@ #include <linux/mm.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> -#include <asm/pgtable.h> #include <asm/tlbflush.h> void __iomem * diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 72fba0eeeb30..528c8a71fe7f 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -11,6 +11,13 @@ #include <asm/apicdef.h> #include <asm/irq_vectors.h> +/* + * The irq entry code is in the noinstr section and the start/end of + * __irqentry_text is emitted via labels. Make the build fail if + * something moves a C function into the __irq_entry section. + */ +#define __irq_entry __invalid_section + static inline int irq_canonicalize(int irq) { return ((irq == 2) ? 9 : irq); @@ -26,17 +33,14 @@ extern void fixup_irqs(void); #ifdef CONFIG_HAVE_KVM extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); -extern __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs); -extern __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs); -extern __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs); #endif extern void (*x86_platform_ipi_callback)(void); extern void native_init_IRQ(void); -extern void handle_irq(struct irq_desc *desc, struct pt_regs *regs); +extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs); -extern __visible void do_IRQ(struct pt_regs *regs); +extern __visible void do_IRQ(struct pt_regs *regs, unsigned long vector); extern void init_ISA_irqs(void); @@ -46,7 +50,6 @@ extern void __init init_IRQ(void); void arch_trigger_cpumask_backtrace(const struct cpumask *mask, bool exclude_self); -extern __visible void smp_x86_platform_ipi(struct pt_regs *regs); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h deleted file mode 100644 index 187ce59aea28..000000000000 --- a/arch/x86/include/asm/irq_regs.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Per-cpu current frame pointer - the location of the last exception frame on - * the stack, stored in the per-cpu area. - * - * Jeremy Fitzhardinge <jeremy@goop.org> - */ -#ifndef _ASM_X86_IRQ_REGS_H -#define _ASM_X86_IRQ_REGS_H - -#include <asm/percpu.h> - -#define ARCH_HAS_OWN_IRQ_REGS - -DECLARE_PER_CPU(struct pt_regs *, irq_regs); - -static inline struct pt_regs *get_irq_regs(void) -{ - return __this_cpu_read(irq_regs); -} - -static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) -{ - struct pt_regs *old_regs; - - old_regs = get_irq_regs(); - __this_cpu_write(irq_regs, new_regs); - - return old_regs; -} - -#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h new file mode 100644 index 000000000000..4ae66f097101 --- /dev/null +++ b/arch/x86/include/asm/irq_stack.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IRQ_STACK_H +#define _ASM_X86_IRQ_STACK_H + +#include <linux/ptrace.h> + +#include <asm/processor.h> + +#ifdef CONFIG_X86_64 +static __always_inline bool irqstack_active(void) +{ + return __this_cpu_read(irq_count) != -1; +} + +void asm_call_on_stack(void *sp, void *func, void *arg); + +static __always_inline void __run_on_irqstack(void *func, void *arg) +{ + void *tos = __this_cpu_read(hardirq_stack_ptr); + + __this_cpu_add(irq_count, 1); + asm_call_on_stack(tos - 8, func, arg); + __this_cpu_sub(irq_count, 1); +} + +#else /* CONFIG_X86_64 */ +static inline bool irqstack_active(void) { return false; } +static inline void __run_on_irqstack(void *func, void *arg) { } +#endif /* !CONFIG_X86_64 */ + +static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_X86_32)) + return false; + if (!regs) + return !irqstack_active(); + return !user_mode(regs) && !irqstack_active(); +} + +static __always_inline void run_on_irqstack_cond(void *func, void *arg, + struct pt_regs *regs) +{ + void (*__func)(void *arg) = func; + + lockdep_assert_irqs_disabled(); + + if (irq_needs_irq_stack(regs)) + __run_on_irqstack(__func, arg); + else + __func(arg); +} + +#endif diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h index 80b35e3adf03..800ffce0db29 100644 --- a/arch/x86/include/asm/irq_work.h +++ b/arch/x86/include/asm/irq_work.h @@ -10,7 +10,6 @@ static inline bool arch_irq_work_has_interrupt(void) return boot_cpu_has(X86_FEATURE_APIC); } extern void arch_irq_work_raise(void); -extern __visible void smp_irq_work_interrupt(struct pt_regs *regs); #else static inline bool arch_irq_work_has_interrupt(void) { diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 8a0e56e1dcc9..02a0cf547d7b 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -17,7 +17,7 @@ /* Declaration required for gcc < 4.9 to prevent -Werror=missing-prototypes */ extern inline unsigned long native_save_fl(void); -extern inline unsigned long native_save_fl(void) +extern __always_inline unsigned long native_save_fl(void) { unsigned long flags; @@ -44,12 +44,12 @@ extern inline void native_restore_fl(unsigned long flags) :"memory", "cc"); } -static inline void native_irq_disable(void) +static __always_inline void native_irq_disable(void) { asm volatile("cli": : :"memory"); } -static inline void native_irq_enable(void) +static __always_inline void native_irq_enable(void) { asm volatile("sti": : :"memory"); } @@ -74,22 +74,22 @@ static inline __cpuidle void native_halt(void) #ifndef __ASSEMBLY__ #include <linux/types.h> -static inline notrace unsigned long arch_local_save_flags(void) +static __always_inline unsigned long arch_local_save_flags(void) { return native_save_fl(); } -static inline notrace void arch_local_irq_restore(unsigned long flags) +static __always_inline void arch_local_irq_restore(unsigned long flags) { native_restore_fl(flags); } -static inline notrace void arch_local_irq_disable(void) +static __always_inline void arch_local_irq_disable(void) { native_irq_disable(); } -static inline notrace void arch_local_irq_enable(void) +static __always_inline void arch_local_irq_enable(void) { native_irq_enable(); } @@ -115,7 +115,7 @@ static inline __cpuidle void halt(void) /* * For spinlocks, etc: */ -static inline notrace unsigned long arch_local_irq_save(void) +static __always_inline unsigned long arch_local_irq_save(void) { unsigned long flags = arch_local_save_flags(); arch_local_irq_disable(); @@ -159,12 +159,12 @@ static inline notrace unsigned long arch_local_irq_save(void) #endif /* CONFIG_PARAVIRT_XXL */ #ifndef __ASSEMBLY__ -static inline int arch_irqs_disabled_flags(unsigned long flags) +static __always_inline int arch_irqs_disabled_flags(unsigned long flags) { return !(flags & X86_EFLAGS_IF); } -static inline int arch_irqs_disabled(void) +static __always_inline int arch_irqs_disabled(void) { unsigned long flags = arch_local_save_flags(); @@ -172,38 +172,4 @@ static inline int arch_irqs_disabled(void) } #endif /* !__ASSEMBLY__ */ -#ifdef __ASSEMBLY__ -#ifdef CONFIG_TRACE_IRQFLAGS -# define TRACE_IRQS_ON call trace_hardirqs_on_thunk; -# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; -#else -# define TRACE_IRQS_ON -# define TRACE_IRQS_OFF -#endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# ifdef CONFIG_X86_64 -# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk -# define LOCKDEP_SYS_EXIT_IRQ \ - TRACE_IRQS_ON; \ - sti; \ - call lockdep_sys_exit_thunk; \ - cli; \ - TRACE_IRQS_OFF; -# else -# define LOCKDEP_SYS_EXIT \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call lockdep_sys_exit; \ - popl %edx; \ - popl %ecx; \ - popl %eax; -# define LOCKDEP_SYS_EXIT_IRQ -# endif -#else -# define LOCKDEP_SYS_EXIT -# define LOCKDEP_SYS_EXIT_IRQ -#endif -#endif /* __ASSEMBLY__ */ - #endif diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index db7ba2feb947..0648190467ba 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -6,8 +6,10 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY void kernel_randomize_memory(void); +void init_trampoline_kaslr(void); #else static inline void kernel_randomize_memory(void) { } +static inline void init_trampoline_kaslr(void) {} #endif /* CONFIG_RANDOMIZE_MEMORY */ #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0a6b35353fc7..f8998e97457f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -83,6 +83,10 @@ #define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24) #define KVM_REQ_APICV_UPDATE \ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) +#define KVM_REQ_HV_TLB_FLUSH \ + KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_APF_READY KVM_ARCH_REQ(28) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -107,15 +111,8 @@ #define UNMAPPED_GVA (~(gpa_t)0) /* KVM Hugepage definitions for x86 */ -enum { - PT_PAGE_TABLE_LEVEL = 1, - PT_DIRECTORY_LEVEL = 2, - PT_PDPE_LEVEL = 3, - /* set max level to the biggest one */ - PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL, -}; -#define KVM_NR_PAGE_SIZES (PT_MAX_HUGEPAGE_LEVEL - \ - PT_PAGE_TABLE_LEVEL + 1) +#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G +#define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1) #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) @@ -124,7 +121,7 @@ enum { static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) { - /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ + /* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */ return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); } @@ -164,9 +161,13 @@ enum kvm_reg { NR_VCPU_REGS, VCPU_EXREG_PDPTR = NR_VCPU_REGS, + VCPU_EXREG_CR0, VCPU_EXREG_CR3, + VCPU_EXREG_CR4, VCPU_EXREG_RFLAGS, VCPU_EXREG_SEGMENTS, + VCPU_EXREG_EXIT_INFO_1, + VCPU_EXREG_EXIT_INFO_2, }; enum { @@ -182,8 +183,10 @@ enum { enum exit_fastpath_completion { EXIT_FASTPATH_NONE, - EXIT_FASTPATH_SKIP_EMUL_INS, + EXIT_FASTPATH_REENTER_GUEST, + EXIT_FASTPATH_EXIT_HANDLED, }; +typedef enum exit_fastpath_completion fastpath_t; struct x86_emulate_ctxt; struct x86_exception; @@ -372,12 +375,12 @@ struct rsvd_bits_validate { }; struct kvm_mmu_root_info { - gpa_t cr3; + gpa_t pgd; hpa_t hpa; }; #define KVM_MMU_ROOT_INFO_INVALID \ - ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE }) + ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE }) #define KVM_MMU_NUM_PREV_ROOTS 3 @@ -403,7 +406,7 @@ struct kvm_mmu { void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte); hpa_t root_hpa; - gpa_t root_cr3; + gpa_t root_pgd; union kvm_mmu_role mmu_role; u8 root_level; u8 shadow_root_level; @@ -598,6 +601,7 @@ struct kvm_vcpu_arch { u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; + u64 perf_capabilities; /* * Paging state of the vcpu @@ -650,7 +654,6 @@ struct kvm_vcpu_arch { u64 xcr0; u64 guest_supported_xcr0; - u32 guest_xstate_size; struct kvm_pio_request pio; void *pio_data; @@ -680,6 +683,7 @@ struct kvm_vcpu_arch { struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; int maxphyaddr; + int tdp_level; /* emulate context */ @@ -703,6 +707,7 @@ struct kvm_vcpu_arch { struct gfn_to_pfn_cache cache; } st; + u64 l1_tsc_offset; u64 tsc_offset; u64 last_guest_tsc; u64 last_host_tsc; @@ -762,14 +767,17 @@ struct kvm_vcpu_arch { struct { bool halted; - gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; + gfn_t gfns[ASYNC_PF_PER_VCPU]; struct gfn_to_hva_cache data; - u64 msr_val; + u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */ + u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */ + u16 vec; u32 id; bool send_user_only; - u32 host_apf_reason; + u32 host_apf_flags; unsigned long nested_apf_token; bool delivery_as_pf_vmexit; + bool pageready_pending; } apf; /* OSVW MSRs (AMD only) */ @@ -855,6 +863,18 @@ struct kvm_apic_map { struct kvm_lapic *phys_map[]; }; +/* Hyper-V synthetic debugger (SynDbg)*/ +struct kvm_hv_syndbg { + struct { + u64 control; + u64 status; + u64 send_page; + u64 recv_page; + u64 pending_page; + } control; + u64 options; +}; + /* Hyper-V emulation context */ struct kvm_hv { struct mutex hv_lock; @@ -866,7 +886,7 @@ struct kvm_hv { u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; u64 hv_crash_ctl; - HV_REFERENCE_TSC_PAGE tsc_ref; + struct ms_hyperv_tsc_page tsc_ref; struct idr conn_to_evt; @@ -878,6 +898,7 @@ struct kvm_hv { atomic_t num_mismatched_vp_indexes; struct hv_partition_assist_pg *hv_pa_pg; + struct kvm_hv_syndbg hv_syndbg; }; enum kvm_irqchip_mode { @@ -1028,6 +1049,8 @@ struct kvm_vcpu_stat { u64 irq_injections; u64 nmi_injections; u64 req_event; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; }; struct x86_instruction_info; @@ -1059,7 +1082,7 @@ struct kvm_x86_ops { void (*hardware_disable)(void); void (*hardware_unsetup)(void); bool (*cpu_has_accelerated_tpr)(void); - bool (*has_emulated_msr)(int index); + bool (*has_emulated_msr)(u32 index); void (*cpuid_update)(struct kvm_vcpu *vcpu); unsigned int vm_size; @@ -1085,8 +1108,6 @@ struct kvm_x86_ops { void (*set_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); - void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); - void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); @@ -1100,7 +1121,8 @@ struct kvm_x86_ops { unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa); + void (*tlb_flush_all)(struct kvm_vcpu *vcpu); + void (*tlb_flush_current)(struct kvm_vcpu *vcpu); int (*tlb_remote_flush)(struct kvm *kvm); int (*tlb_remote_flush_with_range)(struct kvm *kvm, struct kvm_tlb_range *range); @@ -1113,7 +1135,13 @@ struct kvm_x86_ops { */ void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); - void (*run)(struct kvm_vcpu *vcpu); + /* + * Flush any TLB entries created by the guest. Like tlb_flush_gva(), + * does not need to flush GPA->HPA mappings. + */ + void (*tlb_flush_guest)(struct kvm_vcpu *vcpu); + + enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion exit_fastpath); int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); @@ -1126,8 +1154,8 @@ struct kvm_x86_ops { void (*set_nmi)(struct kvm_vcpu *vcpu); void (*queue_exception)(struct kvm_vcpu *vcpu); void (*cancel_injection)(struct kvm_vcpu *vcpu); - int (*interrupt_allowed)(struct kvm_vcpu *vcpu); - int (*nmi_allowed)(struct kvm_vcpu *vcpu); + int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection); + int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); @@ -1141,7 +1169,7 @@ struct kvm_x86_ops { bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); - void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); + void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); @@ -1153,7 +1181,6 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); - u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); /* Returns actual tsc_offset set in active VMCS */ u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); @@ -1163,10 +1190,8 @@ struct kvm_x86_ops { struct x86_instruction_info *info, enum x86_intercept_stage stage, struct x86_exception *exception); - void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion *exit_fastpath); + void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); - int (*check_nested_events)(struct kvm_vcpu *vcpu); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); @@ -1199,6 +1224,7 @@ struct kvm_x86_ops { /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; + const struct kvm_x86_nested_ops *nested_ops; /* * Architecture specific hooks for vCPU blocking due to @@ -1226,18 +1252,10 @@ struct kvm_x86_ops { void (*setup_mce)(struct kvm_vcpu *vcpu); - int (*get_nested_state)(struct kvm_vcpu *vcpu, - struct kvm_nested_state __user *user_kvm_nested_state, - unsigned user_data_size); - int (*set_nested_state)(struct kvm_vcpu *vcpu, - struct kvm_nested_state __user *user_kvm_nested_state, - struct kvm_nested_state *kvm_state); - bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); - - int (*smi_allowed)(struct kvm_vcpu *vcpu); + int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); - int (*enable_smi_window)(struct kvm_vcpu *vcpu); + void (*enable_smi_window)(struct kvm_vcpu *vcpu); int (*mem_enc_op)(struct kvm *kvm, void __user *argp); int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); @@ -1245,14 +1263,28 @@ struct kvm_x86_ops { int (*get_msr_feature)(struct kvm_msr_entry *entry); - int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, - uint16_t *vmcs_version); - uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); - bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); + + void (*migrate_timers)(struct kvm_vcpu *vcpu); +}; + +struct kvm_x86_nested_ops { + int (*check_events)(struct kvm_vcpu *vcpu); + bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); + int (*get_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + unsigned user_data_size); + int (*set_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + struct kvm_nested_state *kvm_state); + bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); + + int (*enable_evmcs)(struct kvm_vcpu *vcpu, + uint16_t *vmcs_version); + uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); }; struct kvm_x86_init_ops { @@ -1274,13 +1306,11 @@ struct kvm_arch_async_pf { extern u64 __read_mostly host_efer; extern struct kvm_x86_ops kvm_x86_ops; -extern struct kmem_cache *x86_fpu_cache; #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { - return __vmalloc(kvm_x86_ops.vm_size, - GFP_KERNEL_ACCOUNT | __GFP_ZERO, PAGE_KERNEL); + return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); } void kvm_arch_free_vm(struct kvm *kvm); @@ -1452,6 +1482,8 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long pay void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); +bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, + struct x86_exception *fault); int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t gfn, void *data, int offset, int len, u32 access); @@ -1479,6 +1511,8 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_inject_nmi(struct kvm_vcpu *vcpu); +void kvm_update_dr7(struct kvm_vcpu *vcpu); + int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); @@ -1509,8 +1543,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); +void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gva_t gva, hpa_t root_hpa); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush); +void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush, + bool skip_mmu_sync); void kvm_configure_mmu(bool enable_tdp, int tdp_page_level); @@ -1574,8 +1611,6 @@ enum { }; #define HF_GIF_MASK (1 << 0) -#define HF_HIF_MASK (1 << 1) -#define HF_VINTR_MASK (1 << 2) #define HF_NMI_MASK (1 << 3) #define HF_IRET_MASK (1 << 4) #define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ @@ -1635,13 +1670,14 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm); void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, unsigned long *vcpu_bitmap); -void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, +bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); +void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu); +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 9b4df6eaa11a..49d3a9edb06f 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,11 +88,21 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); -void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); +void kvm_async_pf_task_wait_schedule(u32 token); void kvm_async_pf_task_wake(u32 token); -u32 kvm_read_and_reset_pf_reason(void); -extern void kvm_disable_steal_time(void); -void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); +u32 kvm_read_and_reset_apf_flags(void); +void kvm_disable_steal_time(void); +bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); + +DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled); + +static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) +{ + if (static_branch_unlikely(&kvm_async_pf_enabled)) + return __kvm_handle_async_pf(regs, token); + else + return false; +} #ifdef CONFIG_PARAVIRT_SPINLOCKS void __init kvm_spinlock_init(void); @@ -103,7 +113,7 @@ static inline void kvm_spinlock_init(void) #endif /* CONFIG_PARAVIRT_SPINLOCKS */ #else /* CONFIG_KVM_GUEST */ -#define kvm_async_pf_task_wait(T, I) do {} while(0) +#define kvm_async_pf_task_wait_schedule(T) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) static inline bool kvm_para_available(void) @@ -121,7 +131,7 @@ static inline unsigned int kvm_arch_para_hints(void) return 0; } -static inline u32 kvm_read_and_reset_pf_reason(void) +static inline u32 kvm_read_and_reset_apf_flags(void) { return 0; } @@ -130,6 +140,11 @@ static inline void kvm_disable_steal_time(void) { return; } + +static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) +{ + return false; +} #endif #endif /* _ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f9cea081c05b..cf503824529c 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -127,6 +127,17 @@ #define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x)) #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x))) +#define XEC(x, mask) (((x) >> 16) & mask) + +/* mce.kflags flag bits for logging etc. */ +#define MCE_HANDLED_CEC BIT_ULL(0) +#define MCE_HANDLED_UC BIT_ULL(1) +#define MCE_HANDLED_EXTLOG BIT_ULL(2) +#define MCE_HANDLED_NFIT BIT_ULL(3) +#define MCE_HANDLED_EDAC BIT_ULL(4) +#define MCE_HANDLED_MCELOG BIT_ULL(5) +#define MCE_IN_KERNEL_RECOV BIT_ULL(6) + /* * This structure contains all data related to the MCE log. Also * carries a signature to make it easier to find from external @@ -142,14 +153,16 @@ struct mce_log_buffer { struct mce entry[]; }; +/* Highest last */ enum mce_notifier_prios { - MCE_PRIO_FIRST = INT_MAX, - MCE_PRIO_UC = INT_MAX - 1, - MCE_PRIO_EXTLOG = INT_MAX - 2, - MCE_PRIO_NFIT = INT_MAX - 3, - MCE_PRIO_EDAC = INT_MAX - 4, - MCE_PRIO_MCELOG = 1, - MCE_PRIO_LOWEST = 0, + MCE_PRIO_LOWEST, + MCE_PRIO_MCELOG, + MCE_PRIO_EDAC, + MCE_PRIO_NFIT, + MCE_PRIO_EXTLOG, + MCE_PRIO_UC, + MCE_PRIO_EARLY, + MCE_PRIO_CEC }; struct notifier_block; @@ -238,7 +251,7 @@ extern void mce_disable_bank(int bank); /* * Exception handler */ -void do_machine_check(struct pt_regs *, long); +void do_machine_check(struct pt_regs *pt_regs); /* * Threshold handler @@ -347,5 +360,4 @@ umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return #endif static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); } - #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h index 9c2447b3555d..9ca760e430b9 100644 --- a/arch/x86/include/asm/memtype.h +++ b/arch/x86/include/asm/memtype.h @@ -24,4 +24,7 @@ extern void memtype_free_io(resource_size_t start, resource_size_t end); extern bool pat_pfn_immune_to_uc_mtrr(unsigned long pfn); +bool x86_has_pat_wp(void); +enum page_cache_mode pgprot2cachemode(pgprot_t pgprot); + #endif /* _ASM_X86_MEMTYPE_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index bdeae9291e5c..0a301ad0b02f 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -45,7 +45,7 @@ typedef struct { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS /* * One bit per protection key says whether userspace can - * use it or not. protected by mmap_sem. + * use it or not. protected by mmap_lock. */ u16 pkey_allocation_map; s16 execute_only_pkey; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 4e55370e48e8..47562147e70b 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -24,21 +24,9 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, #endif /* !CONFIG_PARAVIRT_XXL */ #ifdef CONFIG_PERF_EVENTS - DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); - -static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) -{ - if (static_branch_unlikely(&rdpmc_always_available_key) || - (!static_branch_unlikely(&rdpmc_never_available_key) && - atomic_read(&mm->context.perf_rdpmc_allowed))) - cr4_set_bits_irqsoff(X86_CR4_PCE); - else - cr4_clear_bits_irqsoff(X86_CR4_PCE); -} -#else -static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {} +void cr4_update_pce(void *ignored); #endif #ifdef CONFIG_MODIFY_LDT_SYSCALL @@ -225,78 +213,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return __pkru_allows_pkey(vma_pkey(vma), write); } -/* - * This can be used from process context to figure out what the value of - * CR3 is without needing to do a (slow) __read_cr3(). - * - * It's intended to be used for code like KVM that sneakily changes CR3 - * and needs to restore it. It needs to be used very carefully. - */ -static inline unsigned long __get_current_cr3_fast(void) -{ - unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, - this_cpu_read(cpu_tlbstate.loaded_mm_asid)); - - /* For now, be very restrictive about when this can be called. */ - VM_WARN_ON(in_nmi() || preemptible()); - - VM_BUG_ON(cr3 != __read_cr3()); - return cr3; -} - -typedef struct { - struct mm_struct *mm; -} temp_mm_state_t; - -/* - * Using a temporary mm allows to set temporary mappings that are not accessible - * by other CPUs. Such mappings are needed to perform sensitive memory writes - * that override the kernel memory protections (e.g., W^X), without exposing the - * temporary page-table mappings that are required for these write operations to - * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the - * mapping is torn down. - * - * Context: The temporary mm needs to be used exclusively by a single core. To - * harden security IRQs must be disabled while the temporary mm is - * loaded, thereby preventing interrupt handler bugs from overriding - * the kernel memory protection. - */ -static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) -{ - temp_mm_state_t temp_state; - - lockdep_assert_irqs_disabled(); - temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm); - switch_mm_irqs_off(NULL, mm, current); - - /* - * If breakpoints are enabled, disable them while the temporary mm is - * used. Userspace might set up watchpoints on addresses that are used - * in the temporary mm, which would lead to wrong signals being sent or - * crashes. - * - * Note that breakpoints are not disabled selectively, which also causes - * kernel breakpoints (e.g., perf's) to be disabled. This might be - * undesirable, but still seems reasonable as the code that runs in the - * temporary mm should be short. - */ - if (hw_breakpoint_active()) - hw_breakpoint_disable(); - - return temp_state; -} - -static inline void unuse_temporary_mm(temp_mm_state_t prev_state) -{ - lockdep_assert_irqs_disabled(); - switch_mm_irqs_off(NULL, prev_state.mm, current); - - /* - * Restore the breakpoints if they were disabled before the temporary mm - * was loaded. - */ - if (hw_breakpoint_active()) - hw_breakpoint_restore(); -} +unsigned long __get_current_cr3_fast(void); #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 73d8dd14dda2..2d4515e8b7df 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -14,43 +14,4 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) #endif /* CONFIG_NUMA */ -#ifdef CONFIG_DISCONTIGMEM - -/* - * generic node memory support, the following assumptions apply: - * - * 1) memory comes in 64Mb contiguous chunks which are either present or not - * 2) we will not have more than 64Gb in total - * - * for now assume that 64Gb is max amount of RAM for whole system - * 64Gb / 4096bytes/page = 16777216 pages - */ -#define MAX_NR_PAGES 16777216 -#define MAX_SECTIONS 1024 -#define PAGES_PER_SECTION (MAX_NR_PAGES/MAX_SECTIONS) - -extern s8 physnode_map[]; - -static inline int pfn_to_nid(unsigned long pfn) -{ -#ifdef CONFIG_NUMA - return((int) physnode_map[(pfn) / PAGES_PER_SECTION]); -#else - return 0; -#endif -} - -static inline int pfn_valid(int pfn) -{ - int nid = pfn_to_nid(pfn); - - if (nid >= 0) - return (pfn < node_end_pfn(nid)); - return 0; -} - -#define early_pfn_valid(pfn) pfn_valid((pfn)) - -#endif /* CONFIG_DISCONTIGMEM */ - #endif /* _ASM_X86_MMZONE_32_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index d30805ed323e..60b944dd2df1 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -54,20 +54,8 @@ typedef int (*hyperv_fill_flush_list_func)( vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); #define hv_get_raw_timer() rdtsc_ordered() -void hyperv_callback_vector(void); -void hyperv_reenlightenment_vector(void); -#ifdef CONFIG_TRACING -#define trace_hyperv_callback_vector hyperv_callback_vector -#endif void hyperv_vector_handler(struct pt_regs *regs); -/* - * Routines for stimer0 Direct Mode handling. - * On x86/x64, there are no percpu actions to take. - */ -void hv_stimer0_vector_handler(struct pt_regs *regs); -void hv_stimer0_callback_vector(void); - static inline void hv_enable_stimer0_percpu_irq(int irq) {} static inline void hv_disable_stimer0_percpu_irq(int irq) {} @@ -226,7 +214,6 @@ void hyperv_setup_mmu_ops(void); void *hv_alloc_hyperv_page(void); void *hv_alloc_hyperv_zeroed_page(void); void hv_free_hyperv_page(unsigned long addr); -void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 12c9684d59ba..e8370e64a155 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -128,6 +128,10 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ +/* SRBDS support */ +#define MSR_IA32_MCU_OPT_CTRL 0x00000123 +#define RNGDS_MITG_DIS BIT(0) + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 @@ -301,6 +305,9 @@ #define MSR_PP1_ENERGY_STATUS 0x00000641 #define MSR_PP1_POLICY 0x00000642 +#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b +#define MSR_AMD_RAPL_POWER_UNIT 0xc0010299 + /* Config TDP MSRs */ #define MSR_CONFIG_TDP_NOMINAL 0x00000648 #define MSR_CONFIG_TDP_LEVEL_1 0x00000649 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index b809f117f3f4..73d997aa2966 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -20,8 +20,10 @@ #define MWAIT_ECX_INTERRUPT_BREAK 0x1 #define MWAITX_ECX_TIMER_ENABLE BIT(1) -#define MWAITX_MAX_LOOPS ((u32)-1) +#define MWAITX_MAX_WAIT_CYCLES UINT_MAX #define MWAITX_DISABLE_CSTATES 0xf0 +#define TPAUSE_C01_STATE 1 +#define TPAUSE_C02_STATE 0 u32 get_umwait_control_msr(void); @@ -122,4 +124,24 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) current_clr_polling(); } +/* + * Caller can specify whether to enter C0.1 (low latency, less + * power saving) or C0.2 state (saves more power, but longer wakeup + * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR + * which can force requests for C0.2 to be downgraded to C0.1. + */ +static inline void __tpause(u32 ecx, u32 edx, u32 eax) +{ + /* "tpause %ecx, %edx, %eax;" */ + #ifdef CONFIG_AS_TPAUSE + asm volatile("tpause %%ecx\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #else + asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #endif +} + #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7e9a281e2660..e7752b4038ff 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -4,20 +4,13 @@ #define _ASM_X86_NOSPEC_BRANCH_H_ #include <linux/static_key.h> +#include <linux/frame.h> #include <asm/alternative.h> #include <asm/alternative-asm.h> #include <asm/cpufeatures.h> #include <asm/msr-index.h> - -/* - * This should be used immediately before a retpoline alternative. It tells - * objtool where the retpolines are so that it can make sense of the control - * flow by just reading the original instruction(s) and ignoring the - * alternatives. - */ -#define ANNOTATE_NOSPEC_ALTERNATIVE \ - ANNOTATE_IGNORE_ALTERNATIVE +#include <asm/unwind_hints.h> /* * Fill the CPU return stack buffer. @@ -46,21 +39,25 @@ #define __FILL_RETURN_BUFFER(reg, nr, sp) \ mov $(nr/2), reg; \ 771: \ + ANNOTATE_INTRA_FUNCTION_CALL; \ call 772f; \ 773: /* speculation trap */ \ + UNWIND_HINT_EMPTY; \ pause; \ lfence; \ jmp 773b; \ 772: \ + ANNOTATE_INTRA_FUNCTION_CALL; \ call 774f; \ 775: /* speculation trap */ \ + UNWIND_HINT_EMPTY; \ pause; \ lfence; \ jmp 775b; \ 774: \ + add $(BITS_PER_LONG/8) * 2, sp; \ dec reg; \ - jnz 771b; \ - add $(BITS_PER_LONG/8) * nr, sp; + jnz 771b; #ifdef __ASSEMBLY__ @@ -77,57 +74,27 @@ .endm /* - * These are the bare retpoline primitives for indirect jmp and call. - * Do not use these directly; they only exist to make the ALTERNATIVE - * invocation below less ugly. - */ -.macro RETPOLINE_JMP reg:req - call .Ldo_rop_\@ -.Lspec_trap_\@: - pause - lfence - jmp .Lspec_trap_\@ -.Ldo_rop_\@: - mov \reg, (%_ASM_SP) - ret -.endm - -/* - * This is a wrapper around RETPOLINE_JMP so the called function in reg - * returns to the instruction after the macro. - */ -.macro RETPOLINE_CALL reg:req - jmp .Ldo_call_\@ -.Ldo_retpoline_jmp_\@: - RETPOLINE_JMP \reg -.Ldo_call_\@: - call .Ldo_retpoline_jmp_\@ -.endm - -/* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 * attack. */ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \ - __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ + __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD #else - jmp *\reg + jmp *%\reg #endif .endm .macro CALL_NOSPEC reg:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \ - __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ + __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD #else - call *\reg + call *%\reg #endif .endm @@ -137,10 +104,8 @@ */ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE "jmp .Lskip_rsb_\@", \ - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ - \ftr + ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr + __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) .Lskip_rsb_\@: #endif .endm @@ -161,16 +126,16 @@ * which is ensured when CONFIG_RETPOLINE is defined. */ # define CALL_NOSPEC \ - ANNOTATE_NOSPEC_ALTERNATIVE \ ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - "call __x86_indirect_thunk_%V[thunk_target]\n", \ + "call __x86_retpoline_%V[thunk_target]\n", \ X86_FEATURE_RETPOLINE, \ "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ X86_FEATURE_RETPOLINE_AMD) + # define THUNK_TARGET(addr) [thunk_target] "r" (addr) #else /* CONFIG_X86_32 */ @@ -180,7 +145,6 @@ * here, anyway. */ # define CALL_NOSPEC \ - ANNOTATE_NOSPEC_ALTERNATIVE \ ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ @@ -298,7 +262,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static inline void mds_clear_cpu_buffers(void) +static __always_inline void mds_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -319,7 +283,7 @@ static inline void mds_clear_cpu_buffers(void) * * Clear CPU buffers if the corresponding static key is enabled */ -static inline void mds_user_clear_cpu_buffers(void) +static __always_inline void mds_user_clear_cpu_buffers(void) { if (static_branch_likely(&mds_user_clear)) mds_clear_cpu_buffers(); diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 6e060907c163..d25534940bde 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -58,8 +58,7 @@ #define ORC_TYPE_CALL 0 #define ORC_TYPE_REGS 1 #define ORC_TYPE_REGS_IRET 2 -#define UNWIND_HINT_TYPE_SAVE 3 -#define UNWIND_HINT_TYPE_RESTORE 4 +#define UNWIND_HINT_TYPE_RET_OFFSET 3 #ifndef __ASSEMBLY__ /* diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 694d8daf4983..5ca5d297df75 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -47,7 +47,13 @@ static inline void slow_down_io(void) #endif } -static inline void __flush_tlb(void) +void native_flush_tlb_local(void); +void native_flush_tlb_global(void); +void native_flush_tlb_one_user(unsigned long addr); +void native_flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info); + +static inline void __flush_tlb_local(void) { PVOP_VCALL0(mmu.flush_tlb_user); } @@ -62,8 +68,8 @@ static inline void __flush_tlb_one_user(unsigned long addr) PVOP_VCALL1(mmu.flush_tlb_one_user, addr); } -static inline void flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) +static inline void __flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) { PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info); } diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 6deb6cd236e3..7f6ccff0ba72 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -20,6 +20,8 @@ typedef union { #define SHARED_KERNEL_PMD 0 +#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED + /* * traditional i386 two-level paging structure: */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 5afb5e0fe903..e896ebef8c24 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -39,23 +39,23 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with * a "*pmdp" dereference done by GCC. Problem is, in certain places * where pte_offset_map_lock() is called, concurrent page faults are - * allowed, if the mmap_sem is hold for reading. An example is mincore + * allowed, if the mmap_lock is hold for reading. An example is mincore * vs page faults vs MADV_DONTNEED. On the page fault side * pmd_populate() rightfully does a set_64bit(), but if we're reading the * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen * because GCC will not read the 64-bit value of the pmd atomically. * * To fix this all places running pte_offset_map_lock() while holding the - * mmap_sem in read mode, shall read the pmdp pointer using this + * mmap_lock in read mode, shall read the pmdp pointer using this * function to know if the pmd is null or not, and in turn to know if * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd * operations. * - * Without THP if the mmap_sem is held for reading, the pmd can only + * Without THP if the mmap_lock is held for reading, the pmd can only * transition from null to not null while pmd_read_atomic() runs. So * we can always return atomic pmd values with this function. * - * With THP if the mmap_sem is held for reading, the pmd can become + * With THP if the mmap_lock is held for reading, the pmd can become * trans_huge or none or point to a pte (and in turn become "stable") * at any time under pmd_read_atomic(). We could read it truly * atomically here with an atomic64_read() for the THP enabled case (and diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 33845d36897c..80fbb4a9ed87 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -27,6 +27,8 @@ typedef union { #define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI)) #endif +#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED) + /* * PGDIR_SHIFT determines what a top-level page table entry can map */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 4d02e64af1b3..76aa21e8128d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -257,6 +257,7 @@ static inline int pmd_large(pmd_t pte) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_large */ static inline int pmd_trans_huge(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; @@ -624,7 +625,7 @@ static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) return __pud(pfn | check_pgprot(pgprot)); } -static inline pmd_t pmd_mknotpresent(pmd_t pmd) +static inline pmd_t pmd_mkinvalid(pmd_t pmd) { return pfn_pmd(pmd_pfn(pmd), __pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE))); @@ -801,7 +802,7 @@ static inline int pmd_present(pmd_t pmd) #ifdef CONFIG_NUMA_BALANCING /* * These work without NUMA balancing but the kernel does not care. See the - * comment in include/asm-generic/pgtable.h + * comment in include/linux/pgtable.h */ static inline int pte_protnone(pte_t pte) { @@ -836,17 +837,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd)) /* - * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] - * - * this macro returns the index of the entry in the pmd page which would - * control the given virtual address - */ -static inline unsigned long pmd_index(unsigned long address) -{ - return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); -} - -/* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. * @@ -855,25 +845,6 @@ static inline unsigned long pmd_index(unsigned long address) */ #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) -/* - * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] - * - * this function returns the index of the entry in the pte page which would - * control the given virtual address - * - * Also define macro so we can test if pte_index is defined for arch. - */ -#define pte_index pte_index -static inline unsigned long pte_index(unsigned long address) -{ - return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); -} - -static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) -{ - return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); -} - static inline int pmd_bad(pmd_t pmd) { return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; @@ -906,12 +877,6 @@ static inline unsigned long pud_page_vaddr(pud_t pud) */ #define pud_page(pud) pfn_to_page(pud_pfn(pud)) -/* Find an entry in the second-level page table.. */ -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); -} - #define pud_leaf pud_large static inline int pud_large(pud_t pud) { @@ -931,11 +896,6 @@ static inline int pud_large(pud_t pud) } #endif /* CONFIG_PGTABLE_LEVELS > 2 */ -static inline unsigned long pud_index(unsigned long address) -{ - return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); -} - #if CONFIG_PGTABLE_LEVELS > 3 static inline int p4d_none(p4d_t p4d) { @@ -958,12 +918,6 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d) */ #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d)) -/* Find an entry in the third-level page table.. */ -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) -{ - return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); -} - static inline int p4d_bad(p4d_t p4d) { unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; @@ -1036,30 +990,6 @@ static inline int pgd_none(pgd_t pgd) #endif /* __ASSEMBLY__ */ -/* - * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] - * - * this macro returns the index of the entry in the pgd page which would - * control the given virtual address - */ -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) - -/* - * pgd_offset() returns a (pgd_t *) - * pgd_index() is used get the offset into the pgd page's array of pgd_t's; - */ -#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) -/* - * a shortcut to get a pgd_t in a given mm - */ -#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) -/* - * a shortcut which implies the use of the kernel's pgd, instead - * of a process's - */ -#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) - - #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) @@ -1070,27 +1000,14 @@ void init_mem_mapping(void); void early_alloc_pgt_buf(void); extern void memblock_find_dma_reserve(void); + #ifdef CONFIG_X86_64 -/* Realmode trampoline initialization. */ extern pgd_t trampoline_pgd_entry; -static inline void __meminit init_trampoline_default(void) -{ - /* Default trampoline pgd value */ - trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; -} void __init poking_init(void); unsigned long init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot); - -# ifdef CONFIG_RANDOMIZE_MEMORY -void __meminit init_trampoline(void); -# else -# define init_trampoline init_trampoline_default -# endif -#else -static inline void init_trampoline(void) { } #endif /* local pte updates need not use xchg for locking */ @@ -1545,7 +1462,6 @@ static inline bool arch_faults_on_old_pte(void) return false; } -#include <asm-generic/pgtable.h> #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_H */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 0dca7f7aeff2..d7acae4120d5 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -32,42 +32,23 @@ extern pmd_t initial_pg_pmd[]; void paging_init(void); void sync_initial_page_table(void); -/* - * Define this if things work differently on an i386 and an i486: - * it will (on an i486) warn about kernel memory accesses that are - * done without a 'access_ok( ..)' - */ -#undef TEST_ACCESS_OK - #ifdef CONFIG_X86_PAE # include <asm/pgtable-3level.h> #else # include <asm/pgtable-2level.h> #endif -#if defined(CONFIG_HIGHPTE) -#define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \ - pte_index((address))) -#define pte_unmap(pte) kunmap_atomic((pte)) -#else -#define pte_offset_map(dir, address) \ - ((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address))) -#define pte_unmap(pte) do { } while (0) -#endif - /* Clear a kernel PTE and flush it from the TLB */ #define kpte_clear_flush(ptep, vaddr) \ do { \ pte_clear(&init_mm, (vaddr), (ptep)); \ - __flush_tlb_one_kernel((vaddr)); \ + flush_tlb_one_kernel((vaddr)); \ } while (0) #endif /* !__ASSEMBLY__ */ /* - * kern_addr_valid() is (1) for FLATMEM and (0) for - * SPARSEMEM and DISCONTIGMEM + * kern_addr_valid() is (1) for FLATMEM and (0) for SPARSEMEM */ #ifdef CONFIG_FLATMEM #define kern_addr_valid(addr) (1) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index df1373415f11..1b68d24dc6a0 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -53,6 +53,12 @@ static inline void sync_initial_page_table(void) { } struct mm_struct; +#define mm_p4d_folded mm_p4d_folded +static inline bool mm_p4d_folded(struct mm_struct *mm) +{ + return !pgtable_l5_enabled(); +} + void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte); void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); @@ -180,10 +186,6 @@ extern void sync_global_pgds(unsigned long start, unsigned long end); /* PTE - Level 1 access. */ -/* x86-64 always has all page tables mapped. */ -#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) -#define pte_unmap(pte) ((void)(pte))/* NOP */ - /* * Encode and de-code a swap entry * diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 52e5f5f2240d..8f63efb2a2cc 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -159,4 +159,6 @@ extern unsigned int ptrs_per_p4d; #define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t)) +#define ARCH_PAGE_TABLE_SYNC_MASK (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED) + #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b6606fe6cfdf..2da1f95b88d7 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -194,7 +194,6 @@ enum page_cache_mode { #define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) #define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) #define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) -#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G) #define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) #define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) @@ -220,7 +219,6 @@ enum page_cache_mode { #define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) #define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) #define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) -#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC) #define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) #define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) #define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC) @@ -284,6 +282,12 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; typedef struct { pgdval_t pgd; } pgd_t; +static inline pgprot_t pgprot_nx(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) | _PAGE_NX); +} +#define pgprot_nx pgprot_nx + #ifdef CONFIG_X86_PAE /* @@ -467,9 +471,6 @@ static inline pteval_t pte_flags(pte_t pte) return native_pte_val(pte) & PTE_FLAGS_MASK; } -extern uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM]; -extern uint8_t __pte2cachemode_tbl[8]; - #define __pte2cm_idx(cb) \ ((((cb) >> (_PAGE_BIT_PAT - 2)) & 4) | \ (((cb) >> (_PAGE_BIT_PCD - 1)) & 2) | \ @@ -479,43 +480,26 @@ extern uint8_t __pte2cachemode_tbl[8]; (((i) & 2) << (_PAGE_BIT_PCD - 1)) | \ (((i) & 1) << _PAGE_BIT_PWT)) -static inline unsigned long cachemode2protval(enum page_cache_mode pcm) -{ - if (likely(pcm == 0)) - return 0; - return __cachemode2pte_tbl[pcm]; -} -static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm) +unsigned long cachemode2protval(enum page_cache_mode pcm); + +static inline pgprotval_t protval_4k_2_large(pgprotval_t val) { - return __pgprot(cachemode2protval(pcm)); + return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | + ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); } -static inline enum page_cache_mode pgprot2cachemode(pgprot_t pgprot) +static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) { - unsigned long masked; - - masked = pgprot_val(pgprot) & _PAGE_CACHE_MASK; - if (likely(masked == 0)) - return 0; - return __pte2cachemode_tbl[__pte2cm_idx(masked)]; + return __pgprot(protval_4k_2_large(pgprot_val(pgprot))); } -static inline pgprot_t pgprot_4k_2_large(pgprot_t pgprot) +static inline pgprotval_t protval_large_2_4k(pgprotval_t val) { - pgprotval_t val = pgprot_val(pgprot); - pgprot_t new; - - pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | - ((val & _PAGE_PAT) << (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); - return new; + return (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | + ((val & _PAGE_PAT_LARGE) >> + (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); } static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot) { - pgprotval_t val = pgprot_val(pgprot); - pgprot_t new; - - pgprot_val(new) = (val & ~(_PAGE_PAT | _PAGE_PAT_LARGE)) | - ((val & _PAGE_PAT_LARGE) >> - (_PAGE_BIT_PAT_LARGE - _PAGE_BIT_PAT)); - return new; + return __pgprot(protval_large_2_4k(pgprot_val(pgprot))); } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3bcf27caf6c9..42cd333616c4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -113,9 +113,10 @@ struct cpuinfo_x86 { /* in KB - valid for CPUS which support this call: */ unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ - /* Cache QoS architectural values: */ + /* Cache QoS architectural values, valid only on the BSP: */ int x86_cache_max_rmid; /* max index */ int x86_cache_occ_scale; /* scale to bytes */ + int x86_cache_mbm_width_offset; int x86_power; unsigned long loops_per_jiffy; /* cpuid returned max cores value: */ @@ -727,7 +728,6 @@ static inline void sync_core(void) unsigned int tmp; asm volatile ( - UNWIND_HINT_SAVE "mov %%ss, %0\n\t" "pushq %q0\n\t" "pushq %%rsp\n\t" @@ -737,7 +737,6 @@ static inline void sync_core(void) "pushq %q0\n\t" "pushq $1f\n\t" "iretq\n\t" - UNWIND_HINT_RESTORE "1:" : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); #endif @@ -824,7 +823,7 @@ static inline void prefetch(const void *x) * Useful for spinlocks to avoid one state transition in the * cache coherency protocol: */ -static inline void prefetchw(const void *x) +static __always_inline void prefetchw(const void *x) { alternative_input(BASE_PREFETCH, "prefetchw %P1", X86_FEATURE_3DNOWPREFETCH, diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 6d6475fdd327..ebedeab48704 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -123,7 +123,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) * On x86_64, vm86 mode is mercifully nonexistent, and we don't need * the extra check. */ -static inline int user_mode(struct pt_regs *regs) +static __always_inline int user_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL; diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl.h index f6b7fe2833cc..07603064df8f 100644 --- a/arch/x86/include/asm/resctrl_sched.h +++ b/arch/x86/include/asm/resctrl.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_RESCTRL_SCHED_H -#define _ASM_X86_RESCTRL_SCHED_H +#ifndef _ASM_X86_RESCTRL_H +#define _ASM_X86_RESCTRL_H #ifdef CONFIG_X86_CPU_RESCTRL @@ -84,10 +84,13 @@ static inline void resctrl_sched_in(void) __resctrl_sched_in(); } +void resctrl_cpu_detect(struct cpuinfo_x86 *c); + #else static inline void resctrl_sched_in(void) {} +static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {} #endif /* CONFIG_X86_CPU_RESCTRL */ -#endif /* _ASM_X86_RESCTRL_SCHED_H */ +#endif /* _ASM_X86_RESCTRL_H */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ec2c0a094b5d..5948218f35c5 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -86,28 +86,35 @@ int set_direct_map_default_noflush(struct page *page); extern int kernel_set_to_readonly; #ifdef CONFIG_X86_64 -static inline int set_mce_nospec(unsigned long pfn) +/* + * Prevent speculative access to the page by either unmapping + * it (if we do not require access to any part of the page) or + * marking it uncacheable (if we want to try to retrieve data + * from non-poisoned lines in the page). + */ +static inline int set_mce_nospec(unsigned long pfn, bool unmap) { unsigned long decoy_addr; int rc; /* - * Mark the linear address as UC to make sure we don't log more - * errors because of speculative access to the page. * We would like to just call: - * set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1); + * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); * but doing that would radically increase the odds of a * speculative access to the poison page because we'd have * the virtual address of the kernel 1:1 mapping sitting * around in registers. * Instead we get tricky. We create a non-canonical address * that looks just like the one we want, but has bit 63 flipped. - * This relies on set_memory_uc() properly sanitizing any __pa() + * This relies on set_memory_XX() properly sanitizing any __pa() * results with __PHYSICAL_MASK or PTE_PFN_MASK. */ decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); - rc = set_memory_uc(decoy_addr, 1); + if (unmap) + rc = set_memory_np(decoy_addr, 1); + else + rc = set_memory_uc(decoy_addr, 1); if (rc) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); return rc; diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ed8ec011a9fd..84b645cc8bc9 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -75,7 +75,17 @@ extern char _text[]; static inline bool kaslr_enabled(void) { - return !!(boot_params.hdr.loadflags & KASLR_FLAG); + return IS_ENABLED(CONFIG_RANDOMIZE_MEMORY) && + !!(boot_params.hdr.loadflags & KASLR_FLAG); +} + +/* + * Apply no randomization if KASLR was disabled at boot or if KASAN + * is enabled. KASAN shadow mappings rely on regions being PGD aligned. + */ +static inline bool kaslr_memory_enabled(void) +{ + return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); } static inline unsigned long kaslr_offset(void) diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 27c47d183f4b..8b58d6975d5d 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -57,8 +57,10 @@ static __always_inline unsigned long smap_save(void) { unsigned long flags; - asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC, - X86_FEATURE_SMAP) + asm volatile ("# smap_save\n\t" + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) + "pushf; pop %0; " __ASM_CLAC "\n\t" + "1:" : "=rm" (flags) : : "memory", "cc"); return flags; @@ -66,7 +68,10 @@ static __always_inline unsigned long smap_save(void) static __always_inline void smap_restore(unsigned long flags) { - asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP) + asm volatile ("# smap_restore\n\t" + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) + "push %0; popf\n\t" + "1:" : : "g" (flags) : "memory", "cc"); } diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 6d37b8fcfc77..eb8e781c4353 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -7,6 +7,7 @@ #include <asm/nops.h> #include <asm/processor-flags.h> +#include <linux/irqflags.h> #include <linux/jump_label.h> /* @@ -27,14 +28,14 @@ static inline unsigned long native_read_cr0(void) return val; } -static inline unsigned long native_read_cr2(void) +static __always_inline unsigned long native_read_cr2(void) { unsigned long val; asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); return val; } -static inline void native_write_cr2(unsigned long val) +static __always_inline void native_write_cr2(unsigned long val) { asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); } @@ -129,7 +130,16 @@ static inline void native_wbinvd(void) asm volatile("wbinvd": : :"memory"); } -extern asmlinkage void native_load_gs_index(unsigned); +extern asmlinkage void asm_load_gs_index(unsigned int selector); + +static inline void native_load_gs_index(unsigned int selector) +{ + unsigned long flags; + + local_irq_save(flags); + asm_load_gs_index(selector); + local_irq_restore(flags); +} static inline unsigned long __read_cr4(void) { @@ -150,12 +160,12 @@ static inline void write_cr0(unsigned long x) native_write_cr0(x); } -static inline unsigned long read_cr2(void) +static __always_inline unsigned long read_cr2(void) { return native_read_cr2(); } -static inline void write_cr2(unsigned long x) +static __always_inline void write_cr2(unsigned long x) { native_write_cr2(x); } @@ -186,7 +196,7 @@ static inline void wbinvd(void) #ifdef CONFIG_X86_64 -static inline void load_gs_index(unsigned selector) +static inline void load_gs_index(unsigned int selector) { native_load_gs_index(selector); } diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index bf3e34b25afc..323db6c5852a 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -3,29 +3,7 @@ #define _ASM_X86_SPINLOCK_TYPES_H #include <linux/types.h> - -#ifdef CONFIG_PARAVIRT_SPINLOCKS -#define __TICKET_LOCK_INC 2 -#define TICKET_SLOWPATH_FLAG ((__ticket_t)1) -#else -#define __TICKET_LOCK_INC 1 -#define TICKET_SLOWPATH_FLAG ((__ticket_t)0) -#endif - -#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC)) -typedef u8 __ticket_t; -typedef u16 __ticketpair_t; -#else -typedef u16 __ticket_t; -typedef u32 __ticketpair_t; -#endif - -#define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC) - -#define TICKET_SHIFT (sizeof(__ticket_t) * 8) - #include <asm-generic/qspinlock_types.h> - #include <asm-generic/qrwlock_types.h> #endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 14db05086bbf..5ae5a68e469d 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -87,7 +87,7 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs) } void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, char *log_lvl); + unsigned long *stack, const char *log_lvl); /* The form of the top of the frame on the stack */ struct stack_frame { diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 6ece8561ba66..8a1f5382a4ea 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -96,7 +96,6 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u8 reserved_6[8]; /* Offset 0xe8 */ u64 avic_logical_id; /* Offset 0xf0 */ u64 avic_physical_id; /* Offset 0xf8 */ - u8 reserved_7[768]; }; @@ -203,8 +202,16 @@ struct __attribute__ ((__packed__)) vmcb_save_area { u64 last_excp_to; }; + +static inline void __unused_size_checks(void) +{ + BUILD_BUG_ON(sizeof(struct vmcb_save_area) != 0x298); + BUILD_BUG_ON(sizeof(struct vmcb_control_area) != 256); +} + struct __attribute__ ((__packed__)) vmcb { struct vmcb_control_area control; + u8 reserved_control[1024 - sizeof(struct vmcb_control_area)]; struct vmcb_save_area save; }; diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 0e059b73437b..9f69cc497f4b 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -12,27 +12,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -/* This runs runs on the previous thread's stack. */ -static inline void prepare_switch_to(struct task_struct *next) -{ -#ifdef CONFIG_VMAP_STACK - /* - * If we switch to a stack that has a top-level paging entry - * that is not present in the current mm, the resulting #PF will - * will be promoted to a double-fault and we'll panic. Probe - * the new stack now so that vmalloc_fault can fix up the page - * tables if needed. This can only happen if we use a stack - * in vmap space. - * - * We assume that the stack is aligned so that it never spans - * more than one top-level paging entry. - * - * To minimize cache pollution, just follow the stack pointer. - */ - READ_ONCE(*(unsigned char *)next->thread.sp); -#endif -} - asmlinkage void ret_from_fork(void); /* @@ -67,8 +46,6 @@ struct fork_frame { #define switch_to(prev, next, last) \ do { \ - prepare_switch_to(next); \ - \ ((last) = __switch_to_asm((prev), (next))); \ } while (0) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 67315fa3956a..6593b42cb379 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -64,7 +64,7 @@ extern void text_poke_finish(void); #define DISP32_SIZE 4 -static inline int text_opcode_size(u8 opcode) +static __always_inline int text_opcode_size(u8 opcode) { int size = 0; @@ -118,12 +118,14 @@ extern __ro_after_init struct mm_struct *poking_mm; extern __ro_after_init unsigned long poking_addr; #ifndef CONFIG_UML_X86 -static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) +static __always_inline +void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) { regs->ip = ip; } -static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) +static __always_inline +void int3_emulate_push(struct pt_regs *regs, unsigned long val) { /* * The int3 handler in entry_64.S adds a gap between the @@ -138,7 +140,8 @@ static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) *(unsigned long *)regs->sp = val; } -static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) +static __always_inline +void int3_emulate_call(struct pt_regs *regs, unsigned long func) { int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); int3_emulate_jmp(regs, func); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6f66d841262d..8c87a2e0b660 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -13,140 +13,51 @@ #include <asm/pti.h> #include <asm/processor-flags.h> -/* - * The x86 feature is called PCID (Process Context IDentifier). It is similar - * to what is traditionally called ASID on the RISC processors. - * - * We don't use the traditional ASID implementation, where each process/mm gets - * its own ASID and flush/restart when we run out of ASID space. - * - * Instead we have a small per-cpu array of ASIDs and cache the last few mm's - * that came by on this CPU, allowing cheaper switch_mm between processes on - * this CPU. - * - * We end up with different spaces for different things. To avoid confusion we - * use different names for each of them: - * - * ASID - [0, TLB_NR_DYN_ASIDS-1] - * the canonical identifier for an mm - * - * kPCID - [1, TLB_NR_DYN_ASIDS] - * the value we write into the PCID part of CR3; corresponds to the - * ASID+1, because PCID 0 is special. - * - * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] - * for KPTI each mm has two address spaces and thus needs two - * PCID values, but we can still do with a single ASID denomination - * for each mm. Corresponds to kPCID + 2048. - * - */ - -/* There are 12 bits of space for ASIDS in CR3 */ -#define CR3_HW_ASID_BITS 12 - -/* - * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for - * user/kernel switches - */ -#ifdef CONFIG_PAGE_TABLE_ISOLATION -# define PTI_CONSUMED_PCID_BITS 1 -#else -# define PTI_CONSUMED_PCID_BITS 0 -#endif +void __flush_tlb_all(void); -#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS) +#define TLB_FLUSH_ALL -1UL -/* - * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account - * for them being zero-based. Another -1 is because PCID 0 is reserved for - * use by non-PCID-aware users. - */ -#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2) +void cr4_update_irqsoff(unsigned long set, unsigned long clear); +unsigned long cr4_read_shadow(void); -/* - * 6 because 6 should be plenty and struct tlb_state will fit in two cache - * lines. - */ -#define TLB_NR_DYN_ASIDS 6 - -/* - * Given @asid, compute kPCID - */ -static inline u16 kern_pcid(u16 asid) +/* Set in this cpu's CR4. */ +static inline void cr4_set_bits_irqsoff(unsigned long mask) { - VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); - -#ifdef CONFIG_PAGE_TABLE_ISOLATION - /* - * Make sure that the dynamic ASID space does not confict with the - * bit we are using to switch between user and kernel ASIDs. - */ - BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT)); - - /* - * The ASID being passed in here should have respected the - * MAX_ASID_AVAILABLE and thus never have the switch bit set. - */ - VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT)); -#endif - /* - * The dynamically-assigned ASIDs that get passed in are small - * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set, - * so do not bother to clear it. - * - * If PCID is on, ASID-aware code paths put the ASID+1 into the - * PCID bits. This serves two purposes. It prevents a nasty - * situation in which PCID-unaware code saves CR3, loads some other - * value (with PCID == 0), and then restores CR3, thus corrupting - * the TLB for ASID 0 if the saved ASID was nonzero. It also means - * that any bugs involving loading a PCID-enabled CR3 with - * CR4.PCIDE off will trigger deterministically. - */ - return asid + 1; + cr4_update_irqsoff(mask, 0); } -/* - * Given @asid, compute uPCID - */ -static inline u16 user_pcid(u16 asid) +/* Clear in this cpu's CR4. */ +static inline void cr4_clear_bits_irqsoff(unsigned long mask) { - u16 ret = kern_pcid(asid); -#ifdef CONFIG_PAGE_TABLE_ISOLATION - ret |= 1 << X86_CR3_PTI_PCID_USER_BIT; -#endif - return ret; + cr4_update_irqsoff(0, mask); } -struct pgd_t; -static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) +/* Set in this cpu's CR4. */ +static inline void cr4_set_bits(unsigned long mask) { - if (static_cpu_has(X86_FEATURE_PCID)) { - return __sme_pa(pgd) | kern_pcid(asid); - } else { - VM_WARN_ON_ONCE(asid != 0); - return __sme_pa(pgd); - } + unsigned long flags; + + local_irq_save(flags); + cr4_set_bits_irqsoff(mask); + local_irq_restore(flags); } -static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) +/* Clear in this cpu's CR4. */ +static inline void cr4_clear_bits(unsigned long mask) { - VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); - /* - * Use boot_cpu_has() instead of this_cpu_has() as this function - * might be called during early boot. This should work even after - * boot because all CPU's the have same capabilities: - */ - VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID)); - return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; + unsigned long flags; + + local_irq_save(flags); + cr4_clear_bits_irqsoff(mask); + local_irq_restore(flags); } -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define __flush_tlb() __native_flush_tlb() -#define __flush_tlb_global() __native_flush_tlb_global() -#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr) -#endif +#ifndef MODULE +/* + * 6 because 6 should be plenty and struct tlb_state will fit in two cache + * lines. + */ +#define TLB_NR_DYN_ASIDS 6 struct tlb_context { u64 ctx_id; @@ -242,38 +153,7 @@ struct tlb_state { }; DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); -/* - * Blindly accessing user memory from NMI context can be dangerous - * if we're in the middle of switching the current user task or - * switching the loaded mm. It can also be dangerous if we - * interrupted some kernel code that was temporarily using a - * different mm. - */ -static inline bool nmi_uaccess_okay(void) -{ - struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); - struct mm_struct *current_mm = current->mm; - - VM_WARN_ON_ONCE(!loaded_mm); - - /* - * The condition we want to check is - * current_mm->pgd == __va(read_cr3_pa()). This may be slow, though, - * if we're running in a VM with shadow paging, and nmi_uaccess_okay() - * is supposed to be reasonably fast. - * - * Instead, we check the almost equivalent but somewhat conservative - * condition below, and we rely on the fact that switch_mm_irqs_off() - * sets loaded_mm to LOADED_MM_SWITCHING before writing to CR3. - */ - if (loaded_mm != current_mm) - return false; - - VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa())); - - return true; -} - +bool nmi_uaccess_okay(void); #define nmi_uaccess_okay nmi_uaccess_okay /* Initialize cr4 shadow for this CPU. */ @@ -282,250 +162,12 @@ static inline void cr4_init_shadow(void) this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); } -static inline void __cr4_set(unsigned long cr4) -{ - lockdep_assert_irqs_disabled(); - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); -} - -/* Set in this cpu's CR4. */ -static inline void cr4_set_bits_irqsoff(unsigned long mask) -{ - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 | mask) != cr4) - __cr4_set(cr4 | mask); -} - -/* Clear in this cpu's CR4. */ -static inline void cr4_clear_bits_irqsoff(unsigned long mask) -{ - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 & ~mask) != cr4) - __cr4_set(cr4 & ~mask); -} - -/* Set in this cpu's CR4. */ -static inline void cr4_set_bits(unsigned long mask) -{ - unsigned long flags; - - local_irq_save(flags); - cr4_set_bits_irqsoff(mask); - local_irq_restore(flags); -} - -/* Clear in this cpu's CR4. */ -static inline void cr4_clear_bits(unsigned long mask) -{ - unsigned long flags; - - local_irq_save(flags); - cr4_clear_bits_irqsoff(mask); - local_irq_restore(flags); -} - -static inline void cr4_toggle_bits_irqsoff(unsigned long mask) -{ - unsigned long cr4; - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - __cr4_set(cr4 ^ mask); -} - -/* Read the CR4 shadow. */ -static inline unsigned long cr4_read_shadow(void) -{ - return this_cpu_read(cpu_tlbstate.cr4); -} - -/* - * Mark all other ASIDs as invalid, preserves the current. - */ -static inline void invalidate_other_asid(void) -{ - this_cpu_write(cpu_tlbstate.invalidate_other, true); -} - -/* - * Save some of cr4 feature set we're using (e.g. Pentium 4MB - * enable and PPro Global page enable), so that any CPU's that boot - * up after us can get the correct flags. This should only be used - * during boot on the boot cpu. - */ extern unsigned long mmu_cr4_features; extern u32 *trampoline_cr4_features; -static inline void cr4_set_bits_and_update_boot(unsigned long mask) -{ - mmu_cr4_features |= mask; - if (trampoline_cr4_features) - *trampoline_cr4_features = mmu_cr4_features; - cr4_set_bits(mask); -} - extern void initialize_tlbstate_and_flush(void); /* - * Given an ASID, flush the corresponding user ASID. We can delay this - * until the next time we switch to it. - * - * See SWITCH_TO_USER_CR3. - */ -static inline void invalidate_user_asid(u16 asid) -{ - /* There is no user ASID if address space separation is off */ - if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) - return; - - /* - * We only have a single ASID if PCID is off and the CR3 - * write will have flushed it. - */ - if (!cpu_feature_enabled(X86_FEATURE_PCID)) - return; - - if (!static_cpu_has(X86_FEATURE_PTI)) - return; - - __set_bit(kern_pcid(asid), - (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); -} - -/* - * flush the entire current user mapping - */ -static inline void __native_flush_tlb(void) -{ - /* - * Preemption or interrupts must be disabled to protect the access - * to the per CPU variable and to prevent being preempted between - * read_cr3() and write_cr3(). - */ - WARN_ON_ONCE(preemptible()); - - invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); - - /* If current->mm == NULL then the read_cr3() "borrows" an mm */ - native_write_cr3(__native_read_cr3()); -} - -/* - * flush everything - */ -static inline void __native_flush_tlb_global(void) -{ - unsigned long cr4, flags; - - if (static_cpu_has(X86_FEATURE_INVPCID)) { - /* - * Using INVPCID is considerably faster than a pair of writes - * to CR4 sandwiched inside an IRQ flag save/restore. - * - * Note, this works with CR4.PCIDE=0 or 1. - */ - invpcid_flush_all(); - return; - } - - /* - * Read-modify-write to CR4 - protect it from preemption and - * from interrupts. (Use the raw variant because this code can - * be called from deep inside debugging code.) - */ - raw_local_irq_save(flags); - - cr4 = this_cpu_read(cpu_tlbstate.cr4); - /* toggle PGE */ - native_write_cr4(cr4 ^ X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); - - raw_local_irq_restore(flags); -} - -/* - * flush one page in the user mapping - */ -static inline void __native_flush_tlb_one_user(unsigned long addr) -{ - u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); - - asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); - - if (!static_cpu_has(X86_FEATURE_PTI)) - return; - - /* - * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1. - * Just use invalidate_user_asid() in case we are called early. - */ - if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) - invalidate_user_asid(loaded_mm_asid); - else - invpcid_flush_one(user_pcid(loaded_mm_asid), addr); -} - -/* - * flush everything - */ -static inline void __flush_tlb_all(void) -{ - /* - * This is to catch users with enabled preemption and the PGE feature - * and don't trigger the warning in __native_flush_tlb(). - */ - VM_WARN_ON_ONCE(preemptible()); - - if (boot_cpu_has(X86_FEATURE_PGE)) { - __flush_tlb_global(); - } else { - /* - * !PGE -> !PCID (setup_pcid()), thus every flush is total. - */ - __flush_tlb(); - } -} - -/* - * flush one page in the kernel mapping - */ -static inline void __flush_tlb_one_kernel(unsigned long addr) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); - - /* - * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its - * paravirt equivalent. Even with PCID, this is sufficient: we only - * use PCID if we also use global PTEs for the kernel mapping, and - * INVLPG flushes global translations across all address spaces. - * - * If PTI is on, then the kernel is mapped with non-global PTEs, and - * __flush_tlb_one_user() will flush the given address for the current - * kernel address space and for its usermode counterpart, but it does - * not flush it for other address spaces. - */ - __flush_tlb_one_user(addr); - - if (!static_cpu_has(X86_FEATURE_PTI)) - return; - - /* - * See above. We need to propagate the flush to all other address - * spaces. In principle, we only need to propagate it to kernelmode - * address spaces, but the extra bookkeeping we would need is not - * worth it. - */ - invalidate_other_asid(); -} - -#define TLB_FLUSH_ALL -1UL - -/* * TLB flushing: * * - flush_tlb_all() flushes all processes TLBs @@ -563,7 +205,15 @@ struct flush_tlb_info { bool freed_tables; }; -#define local_flush_tlb() __flush_tlb() +void flush_tlb_local(void); +void flush_tlb_one_user(unsigned long addr); +void flush_tlb_one_kernel(unsigned long addr); +void flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info); + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#endif #define flush_tlb_mm(mm) \ flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true) @@ -585,9 +235,6 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false); } -void native_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info); - static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { /* @@ -608,12 +255,6 @@ static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); -#ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, info) \ - native_flush_tlb_others(mask, info) - -#define paravirt_tlb_remove_table(tlb, page) \ - tlb_remove_page(tlb, (void *)(page)) -#endif +#endif /* !MODULE */ #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/include/asm/trace/common.h b/arch/x86/include/asm/trace/common.h index 57c8da027d99..f0f9bcdb74d9 100644 --- a/arch/x86/include/asm/trace/common.h +++ b/arch/x86/include/asm/trace/common.h @@ -5,12 +5,8 @@ DECLARE_STATIC_KEY_FALSE(trace_pagefault_key); #define trace_pagefault_enabled() \ static_branch_unlikely(&trace_pagefault_key) -DECLARE_STATIC_KEY_FALSE(trace_resched_ipi_key); -#define trace_resched_ipi_enabled() \ - static_branch_unlikely(&trace_resched_ipi_key) #else static inline bool trace_pagefault_enabled(void) { return false; } -static inline bool trace_resched_ipi_enabled(void) { return false; } #endif #endif diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 33b9d0f0aafe..88e7f0f3bf62 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -10,9 +10,6 @@ #ifdef CONFIG_X86_LOCAL_APIC -extern int trace_resched_ipi_reg(void); -extern void trace_resched_ipi_unreg(void); - DECLARE_EVENT_CLASS(x86_irq_vector, TP_PROTO(int vector), @@ -37,18 +34,6 @@ DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ TP_PROTO(int vector), \ TP_ARGS(vector), NULL, NULL); -#define DEFINE_RESCHED_IPI_EVENT(name) \ -DEFINE_EVENT_FN(x86_irq_vector, name##_entry, \ - TP_PROTO(int vector), \ - TP_ARGS(vector), \ - trace_resched_ipi_reg, \ - trace_resched_ipi_unreg); \ -DEFINE_EVENT_FN(x86_irq_vector, name##_exit, \ - TP_PROTO(int vector), \ - TP_ARGS(vector), \ - trace_resched_ipi_reg, \ - trace_resched_ipi_unreg); - /* * local_timer - called when entering/exiting a local timer interrupt * vector handler @@ -99,7 +84,7 @@ TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0); /* * reschedule - called when entering/exiting a reschedule vector handler */ -DEFINE_RESCHED_IPI_EVENT(reschedule); +DEFINE_IRQ_VECTOR_EVENT(reschedule); /* * call_function - called when entering/exiting a call function interrupt diff --git a/arch/x86/include/asm/trapnr.h b/arch/x86/include/asm/trapnr.h new file mode 100644 index 000000000000..082f45631fa9 --- /dev/null +++ b/arch/x86/include/asm/trapnr.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_TRAPNR_H +#define _ASM_X86_TRAPNR_H + +/* Interrupts/Exceptions */ + +#define X86_TRAP_DE 0 /* Divide-by-zero */ +#define X86_TRAP_DB 1 /* Debug */ +#define X86_TRAP_NMI 2 /* Non-maskable Interrupt */ +#define X86_TRAP_BP 3 /* Breakpoint */ +#define X86_TRAP_OF 4 /* Overflow */ +#define X86_TRAP_BR 5 /* Bound Range Exceeded */ +#define X86_TRAP_UD 6 /* Invalid Opcode */ +#define X86_TRAP_NM 7 /* Device Not Available */ +#define X86_TRAP_DF 8 /* Double Fault */ +#define X86_TRAP_OLD_MF 9 /* Coprocessor Segment Overrun */ +#define X86_TRAP_TS 10 /* Invalid TSS */ +#define X86_TRAP_NP 11 /* Segment Not Present */ +#define X86_TRAP_SS 12 /* Stack Segment Fault */ +#define X86_TRAP_GP 13 /* General Protection Fault */ +#define X86_TRAP_PF 14 /* Page Fault */ +#define X86_TRAP_SPURIOUS 15 /* Spurious Interrupt */ +#define X86_TRAP_MF 16 /* x87 Floating-Point Exception */ +#define X86_TRAP_AC 17 /* Alignment Check */ +#define X86_TRAP_MC 18 /* Machine Check */ +#define X86_TRAP_XF 19 /* SIMD Floating-Point Exception */ +#define X86_TRAP_VE 20 /* Virtualization Exception */ +#define X86_TRAP_CP 21 /* Control Protection Exception */ +#define X86_TRAP_IRET 32 /* IRET Exception */ + +#endif diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index c26a7e1d8a2c..714b1a30e7b0 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -6,87 +6,9 @@ #include <linux/kprobes.h> #include <asm/debugreg.h> +#include <asm/idtentry.h> #include <asm/siginfo.h> /* TRAP_TRACE, ... */ -#define dotraplinkage __visible - -asmlinkage void divide_error(void); -asmlinkage void debug(void); -asmlinkage void nmi(void); -asmlinkage void int3(void); -asmlinkage void overflow(void); -asmlinkage void bounds(void); -asmlinkage void invalid_op(void); -asmlinkage void device_not_available(void); -#ifdef CONFIG_X86_64 -asmlinkage void double_fault(void); -#endif -asmlinkage void coprocessor_segment_overrun(void); -asmlinkage void invalid_TSS(void); -asmlinkage void segment_not_present(void); -asmlinkage void stack_segment(void); -asmlinkage void general_protection(void); -asmlinkage void page_fault(void); -asmlinkage void async_page_fault(void); -asmlinkage void spurious_interrupt_bug(void); -asmlinkage void coprocessor_error(void); -asmlinkage void alignment_check(void); -#ifdef CONFIG_X86_MCE -asmlinkage void machine_check(void); -#endif /* CONFIG_X86_MCE */ -asmlinkage void simd_coprocessor_error(void); - -#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) -asmlinkage void xen_divide_error(void); -asmlinkage void xen_xennmi(void); -asmlinkage void xen_xendebug(void); -asmlinkage void xen_int3(void); -asmlinkage void xen_overflow(void); -asmlinkage void xen_bounds(void); -asmlinkage void xen_invalid_op(void); -asmlinkage void xen_device_not_available(void); -asmlinkage void xen_double_fault(void); -asmlinkage void xen_coprocessor_segment_overrun(void); -asmlinkage void xen_invalid_TSS(void); -asmlinkage void xen_segment_not_present(void); -asmlinkage void xen_stack_segment(void); -asmlinkage void xen_general_protection(void); -asmlinkage void xen_page_fault(void); -asmlinkage void xen_spurious_interrupt_bug(void); -asmlinkage void xen_coprocessor_error(void); -asmlinkage void xen_alignment_check(void); -#ifdef CONFIG_X86_MCE -asmlinkage void xen_machine_check(void); -#endif /* CONFIG_X86_MCE */ -asmlinkage void xen_simd_coprocessor_error(void); -#endif - -dotraplinkage void do_divide_error(struct pt_regs *regs, long error_code); -dotraplinkage void do_debug(struct pt_regs *regs, long error_code); -dotraplinkage void do_nmi(struct pt_regs *regs, long error_code); -dotraplinkage void do_int3(struct pt_regs *regs, long error_code); -dotraplinkage void do_overflow(struct pt_regs *regs, long error_code); -dotraplinkage void do_bounds(struct pt_regs *regs, long error_code); -dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code); -dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); -#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT) -dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); -#endif -dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); -dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); -dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); -dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code); -dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code); -dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); -dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); -dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code); -dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code); -dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code); -#ifdef CONFIG_X86_32 -dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code); -#endif -dotraplinkage void do_mce(struct pt_regs *regs, long error_code); - #ifdef CONFIG_X86_64 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs); asmlinkage __visible notrace @@ -94,6 +16,11 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s); void __init trap_init(void); #endif +#ifdef CONFIG_X86_F00F_BUG +/* For handling the FOOF bug */ +void handle_invalid_op(struct pt_regs *regs); +#endif + static inline int get_si_code(unsigned long condition) { if (condition & DR_STEP) @@ -107,21 +34,6 @@ static inline int get_si_code(unsigned long condition) extern int panic_on_unrecovered_nmi; void math_emulate(struct math_emu_info *); -#ifndef CONFIG_X86_32 -asmlinkage void smp_thermal_interrupt(struct pt_regs *regs); -asmlinkage void smp_threshold_interrupt(struct pt_regs *regs); -asmlinkage void smp_deferred_error_interrupt(struct pt_regs *regs); -#endif - -void smp_apic_timer_interrupt(struct pt_regs *regs); -void smp_spurious_interrupt(struct pt_regs *regs); -void smp_error_interrupt(struct pt_regs *regs); -asmlinkage void smp_irq_move_cleanup_interrupt(void); - -extern void ist_enter(struct pt_regs *regs); -extern void ist_exit(struct pt_regs *regs); -extern void ist_begin_non_atomic(struct pt_regs *regs); -extern void ist_end_non_atomic(void); #ifdef CONFIG_VMAP_STACK void __noreturn handle_stack_overflow(const char *message, @@ -129,31 +41,6 @@ void __noreturn handle_stack_overflow(const char *message, unsigned long fault_address); #endif -/* Interrupts/Exceptions */ -enum { - X86_TRAP_DE = 0, /* 0, Divide-by-zero */ - X86_TRAP_DB, /* 1, Debug */ - X86_TRAP_NMI, /* 2, Non-maskable Interrupt */ - X86_TRAP_BP, /* 3, Breakpoint */ - X86_TRAP_OF, /* 4, Overflow */ - X86_TRAP_BR, /* 5, Bound Range Exceeded */ - X86_TRAP_UD, /* 6, Invalid Opcode */ - X86_TRAP_NM, /* 7, Device Not Available */ - X86_TRAP_DF, /* 8, Double Fault */ - X86_TRAP_OLD_MF, /* 9, Coprocessor Segment Overrun */ - X86_TRAP_TS, /* 10, Invalid TSS */ - X86_TRAP_NP, /* 11, Segment Not Present */ - X86_TRAP_SS, /* 12, Stack Segment Fault */ - X86_TRAP_GP, /* 13, General Protection Fault */ - X86_TRAP_PF, /* 14, Page Fault */ - X86_TRAP_SPURIOUS, /* 15, Spurious Interrupt */ - X86_TRAP_MF, /* 16, x87 Floating-Point Exception */ - X86_TRAP_AC, /* 17, Alignment Check */ - X86_TRAP_MC, /* 18, Machine Check */ - X86_TRAP_XF, /* 19, SIMD Floating-Point Exception */ - X86_TRAP_IRET = 32, /* 32, IRET Exception */ -}; - /* * Page fault error code bits: * diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d8f283b9a569..18dfa07d3ef0 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -504,12 +504,12 @@ do { \ * We want the unsafe accessors to always be inlined and use * the error labels - thus the macro games. */ -#define unsafe_copy_loop(dst, src, len, type, label) \ - while (len >= sizeof(type)) { \ - unsafe_put_user(*(type *)src,(type __user *)dst,label); \ - dst += sizeof(type); \ - src += sizeof(type); \ - len -= sizeof(type); \ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ } #define unsafe_copy_to_user(_dst,_src,_len,label) \ @@ -523,5 +523,21 @@ do { \ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ } while (0) +#define HAVE_GET_KERNEL_NOFAULT + +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + int __kr_err; \ + \ + __get_user_size(*((type *)(dst)), (__force type __user *)(src), \ + sizeof(type), __kr_err); \ + if (unlikely(__kr_err)) \ + goto err_label; \ +} while (0) + +#define __put_kernel_nofault(dst, src, type, err_label) \ + __put_user_size(*((type *)(src)), (__force type __user *)(dst), \ + sizeof(type), err_label) + #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index f5e2eb12cb71..7d903fdb3f43 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -86,32 +86,15 @@ UNWIND_HINT sp_offset=\sp_offset .endm -.macro UNWIND_HINT_SAVE - UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE -.endm - -.macro UNWIND_HINT_RESTORE - UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE +/* + * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN + * and sibling calls. On these, sp_offset denotes the expected offset from + * initial_func_cfi. + */ +.macro UNWIND_HINT_RET_OFFSET sp_offset=8 + UNWIND_HINT type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset .endm -#else /* !__ASSEMBLY__ */ - -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ - "987: \n\t" \ - ".pushsection .discard.unwind_hints\n\t" \ - /* struct unwind_hint */ \ - ".long 987b - .\n\t" \ - ".short " __stringify(sp_offset) "\n\t" \ - ".byte " __stringify(sp_reg) "\n\t" \ - ".byte " __stringify(type) "\n\t" \ - ".byte " __stringify(end) "\n\t" \ - ".balign 4 \n\t" \ - ".popsection\n\t" - -#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0) - -#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0) - #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 389174eaec79..2fcc3ac12e76 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -123,12 +123,6 @@ enum uv_memprotect { UV_MEMPROT_ALLOW_RW }; -/* - * bios calls have 6 parameters - */ -extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64); -extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64); - extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *); extern s64 uv_bios_freq_base(u64, u64 *); extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int, @@ -146,7 +140,6 @@ extern long sn_partition_id; extern long sn_coherency_id; extern long sn_region_size; extern long system_serial_number; -#define uv_partition_coherence_id() (sn_coherency_id) extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 45ea95ce79b4..3db85626048f 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -8,6 +8,7 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; struct cpumask; struct mm_struct; +struct flush_tlb_info; #ifdef CONFIG_X86_UV #include <linux/efi.h> @@ -31,7 +32,6 @@ static inline bool is_early_uv_system(void) } extern int is_uv_system(void); extern int is_uv_hubbed(int uvtype); -extern int is_uv_hubless(int uvtype); extern void uv_cpu_init(void); extern void uv_nmi_init(void); extern void uv_system_init(void); @@ -44,7 +44,6 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline bool is_early_uv_system(void) { return 0; } static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubbed(int uv) { return 0; } -static inline int is_uv_hubless(int uv) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } static inline const struct cpumask * diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 13687bf0e0a9..f1188bd47658 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -12,6 +12,8 @@ #define _ASM_X86_UV_UV_BAU_H #include <linux/bitmap.h> +#include <asm/idtentry.h> + #define BITSPERBYTE 8 /* @@ -799,12 +801,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) bitmap_zero(&dstp->bits, nbits); } -extern void uv_bau_message_intr1(void); -#ifdef CONFIG_TRACING -#define trace_uv_bau_message_intr1 uv_bau_message_intr1 -#endif -extern void uv_bau_timeout_intr1(void); - struct atomic_short { short counter; }; diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 950cd1395d5d..60ca0afdeaf9 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -219,20 +219,6 @@ static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu) return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info; } -#define UV_HUB_INFO_VERSION 0x7150 -extern int uv_hub_info_version(void); -static inline int uv_hub_info_check(int version) -{ - if (uv_hub_info_version() == version) - return 0; - - pr_crit("UV: uv_hub_info version(%x) mismatch, expecting(%x)\n", - uv_hub_info_version(), version); - - BUG(); /* Catastrophic - cannot continue on unknown UV system */ -} -#define _uv_hub_info_check() uv_hub_info_check(UV_HUB_INFO_VERSION) - /* * HUB revision ranges for each UV HUB architecture. * This is a software convention - NOT the hardware revision numbers in @@ -244,51 +230,32 @@ static inline int uv_hub_info_check(int version) #define UV4_HUB_REVISION_BASE 7 #define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */ -/* WARNING: UVx_HUB_IS_SUPPORTED defines are deprecated and will be removed */ static inline int is_uv1_hub(void) { -#ifdef UV1_HUB_IS_SUPPORTED return is_uv_hubbed(uv(1)); -#else - return 0; -#endif } static inline int is_uv2_hub(void) { -#ifdef UV2_HUB_IS_SUPPORTED return is_uv_hubbed(uv(2)); -#else - return 0; -#endif } static inline int is_uv3_hub(void) { -#ifdef UV3_HUB_IS_SUPPORTED return is_uv_hubbed(uv(3)); -#else - return 0; -#endif } /* First test "is UV4A", then "is UV4" */ static inline int is_uv4a_hub(void) { -#ifdef UV4A_HUB_IS_SUPPORTED if (is_uv_hubbed(uv(4))) return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE); -#endif return 0; } static inline int is_uv4_hub(void) { -#ifdef UV4_HUB_IS_SUPPORTED return is_uv_hubbed(uv(4)); -#else - return 0; -#endif } static inline int is_uvx_hub(void) @@ -692,7 +659,6 @@ static inline int uv_cpu_blade_processor_id(int cpu) { return uv_cpu_info_per(cpu)->blade_cpu_id; } -#define _uv_cpu_blade_processor_id 1 /* indicate function available */ /* Blade number to Node number (UV1..UV4 is 1:1) */ static inline int uv_blade_to_node(int blade) @@ -856,26 +822,6 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) } extern unsigned int uv_apicid_hibits; -static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) -{ - apicid |= uv_apicid_hibits; - return (1UL << UVH_IPI_INT_SEND_SHFT) | - ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | - (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | - (vector << UVH_IPI_INT_VECTOR_SHFT); -} - -static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) -{ - unsigned long val; - unsigned long dmode = dest_Fixed; - - if (vector == NMI_VECTOR) - dmode = dest_NMI; - - val = uv_hub_ipi_value(apicid, vector, dmode); - uv_write_global_mmr64(pnode, UVH_IPI_INT, val); -} /* * Get the minimum revision number of the hub chips within the partition. diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 62c79e26a59a..9ee5ed6e8b34 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -99,13 +99,6 @@ #define UV3_HUB_PART_NUMBER_X 0x4321 #define UV4_HUB_PART_NUMBER 0x99a1 -/* Compat: Indicate which UV Hubs are supported. */ -#define UV1_HUB_IS_SUPPORTED 1 -#define UV2_HUB_IS_SUPPORTED 1 -#define UV3_HUB_IS_SUPPORTED 1 -#define UV4_HUB_IS_SUPPORTED 1 -#define UV4A_HUB_IS_SUPPORTED 1 - /* Error function to catch undefined references */ extern unsigned long uv_undefined(char *str); diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 9a6dc9b4ec99..fb81fea99093 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -271,6 +271,24 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return __vdso_data; } +static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd) +{ + return true; +} +#define vdso_clocksource_ok arch_vdso_clocksource_ok + +/* + * Clocksource read value validation to handle PV and HyperV clocksources + * which can be invalidated asynchronously and indicate invalidation by + * returning U64_MAX, which can be effectively tested by checking for a + * negative value after casting it to s64. + */ +static inline bool arch_vdso_cycles_ok(u64 cycles) +{ + return (s64)cycles >= 0; +} +#define vdso_cycles_ok arch_vdso_cycles_ok + /* * x86 specific delta calculation. * diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 5e090d1f03f8..cd7de4b401fe 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -527,10 +527,12 @@ struct vmx_msr_entry { /* * Exit Qualifications for entry failure during or after loading guest state */ -#define ENTRY_FAIL_DEFAULT 0 -#define ENTRY_FAIL_PDPTE 2 -#define ENTRY_FAIL_NMI 3 -#define ENTRY_FAIL_VMCS_LINK_PTR 4 +enum vm_entry_failure_code { + ENTRY_FAIL_DEFAULT = 0, + ENTRY_FAIL_PDPTE = 2, + ENTRY_FAIL_NMI = 3, + ENTRY_FAIL_VMCS_LINK_PTR = 4, +}; /* * Exit Qualifications for EPT Violations diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 96d9cd208610..6807153c0410 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -50,14 +50,12 @@ struct x86_init_resources { * @pre_vector_init: init code to run before interrupt vectors * are set up. * @intr_init: interrupt init code - * @trap_init: platform specific trap setup * @intr_mode_select: interrupt delivery mode selection * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); - void (*trap_init)(void); void (*intr_mode_select)(void); void (*intr_mode_init)(void); }; diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index d50c7b747d8b..ba4c1b15908b 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -38,11 +38,11 @@ #include <linux/errno.h> #include <linux/string.h> #include <linux/types.h> +#include <linux/pgtable.h> #include <trace/events/xen.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/smap.h> #include <asm/nospec-branch.h> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 790ce08e41f2..5941e18edd5a 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -11,7 +11,6 @@ #include <asm/extable.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <xen/interface/xen.h> #include <xen/interface/grant_table.h> |