diff options
Diffstat (limited to 'arch/x86/include')
-rw-r--r-- | arch/x86/include/asm/cmpxchg.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/elf.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/fsgsbase.h | 49 | ||||
-rw-r--r-- | arch/x86/include/asm/segment.h | 46 | ||||
-rw-r--r-- | arch/x86/include/asm/string_64.h | 20 | ||||
-rw-r--r-- | arch/x86/include/asm/vgtod.h | 26 |
6 files changed, 120 insertions, 37 deletions
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index a55d79b233d3..bfb85e5844ab 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -242,10 +242,12 @@ extern void __add_wrong_size(void) BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long))); \ VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2)); \ - asm volatile(pfx "cmpxchg%c4b %2; sete %0" \ - : "=a" (__ret), "+d" (__old2), \ - "+m" (*(p1)), "+m" (*(p2)) \ - : "i" (2 * sizeof(long)), "a" (__old1), \ + asm volatile(pfx "cmpxchg%c5b %1" \ + CC_SET(e) \ + : CC_OUT(e) (__ret), \ + "+m" (*(p1)), "+m" (*(p2)), \ + "+a" (__old1), "+d" (__old2) \ + : "i" (2 * sizeof(long)), \ "b" (__new1), "c" (__new2)); \ __ret; \ }) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index a357031d85b5..69c0f892e310 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -10,6 +10,7 @@ #include <asm/ptrace.h> #include <asm/user.h> #include <asm/auxvec.h> +#include <asm/fsgsbase.h> typedef unsigned long elf_greg_t; @@ -204,7 +205,6 @@ void set_personality_ia32(bool); #define ELF_CORE_COPY_REGS(pr_reg, regs) \ do { \ - unsigned long base; \ unsigned v; \ (pr_reg)[0] = (regs)->r15; \ (pr_reg)[1] = (regs)->r14; \ @@ -227,8 +227,8 @@ do { \ (pr_reg)[18] = (regs)->flags; \ (pr_reg)[19] = (regs)->sp; \ (pr_reg)[20] = (regs)->ss; \ - rdmsrl(MSR_FS_BASE, base); (pr_reg)[21] = base; \ - rdmsrl(MSR_KERNEL_GS_BASE, base); (pr_reg)[22] = base; \ + (pr_reg)[21] = x86_fsbase_read_cpu(); \ + (pr_reg)[22] = x86_gsbase_read_cpu_inactive(); \ asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \ asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \ asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \ diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h new file mode 100644 index 000000000000..eb377b6e9eed --- /dev/null +++ b/arch/x86/include/asm/fsgsbase.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_FSGSBASE_H +#define _ASM_FSGSBASE_H + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_X86_64 + +#include <asm/msr-index.h> + +/* + * Read/write a task's FSBASE or GSBASE. This returns the value that + * the FS/GS base would have (if the task were to be resumed). These + * work on the current task or on a non-running (typically stopped + * ptrace child) task. + */ +extern unsigned long x86_fsbase_read_task(struct task_struct *task); +extern unsigned long x86_gsbase_read_task(struct task_struct *task); +extern int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase); +extern int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase); + +/* Helper functions for reading/writing FS/GS base */ + +static inline unsigned long x86_fsbase_read_cpu(void) +{ + unsigned long fsbase; + + rdmsrl(MSR_FS_BASE, fsbase); + + return fsbase; +} + +static inline unsigned long x86_gsbase_read_cpu_inactive(void) +{ + unsigned long gsbase; + + rdmsrl(MSR_KERNEL_GS_BASE, gsbase); + + return gsbase; +} + +extern void x86_fsbase_write_cpu(unsigned long fsbase); +extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase); + +#endif /* CONFIG_X86_64 */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_FSGSBASE_H */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index e293c122d0d5..a314087add07 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -186,8 +186,7 @@ #define GDT_ENTRY_TLS_MIN 12 #define GDT_ENTRY_TLS_MAX 14 -/* Abused to load per CPU data from limit */ -#define GDT_ENTRY_PER_CPU 15 +#define GDT_ENTRY_CPUNODE 15 /* * Number of entries in the GDT table: @@ -207,7 +206,7 @@ #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) #define __USER32_DS __USER_DS #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) -#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3) +#define __CPUNODE_SEG (GDT_ENTRY_CPUNODE*8 + 3) #endif @@ -225,6 +224,47 @@ #define GDT_ENTRY_TLS_ENTRIES 3 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) +#ifdef CONFIG_X86_64 + +/* Bit size and mask of CPU number stored in the per CPU data (and TSC_AUX) */ +#define VDSO_CPUNODE_BITS 12 +#define VDSO_CPUNODE_MASK 0xfff + +#ifndef __ASSEMBLY__ + +/* Helper functions to store/load CPU and node numbers */ + +static inline unsigned long vdso_encode_cpunode(int cpu, unsigned long node) +{ + return (node << VDSO_CPUNODE_BITS) | cpu; +} + +static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) +{ + unsigned int p; + + /* + * Load CPU and node number from the GDT. LSL is faster than RDTSCP + * and works on all CPUs. This is volatile so that it orders + * correctly with respect to barrier() and to keep GCC from cleverly + * hoisting it out of the calling function. + * + * If RDPID is available, use it. + */ + alternative_io ("lsl %[seg],%[p]", + ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ + X86_FEATURE_RDPID, + [p] "=a" (p), [seg] "r" (__CPUNODE_SEG)); + + if (cpu) + *cpu = (p & VDSO_CPUNODE_MASK); + if (node) + *node = (p >> VDSO_CPUNODE_BITS); +} + +#endif /* !__ASSEMBLY__ */ +#endif /* CONFIG_X86_64 */ + #ifdef __KERNEL__ /* diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index d33f92b9fa22..7ad41bfcc16c 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -149,7 +149,25 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt) #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 -void memcpy_flushcache(void *dst, const void *src, size_t cnt); +void __memcpy_flushcache(void *dst, const void *src, size_t cnt); +static __always_inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) +{ + if (__builtin_constant_p(cnt)) { + switch (cnt) { + case 4: + asm ("movntil %1, %0" : "=m"(*(u32 *)dst) : "r"(*(u32 *)src)); + return; + case 8: + asm ("movntiq %1, %0" : "=m"(*(u64 *)dst) : "r"(*(u64 *)src)); + return; + case 16: + asm ("movntiq %1, %0" : "=m"(*(u64 *)dst) : "r"(*(u64 *)src)); + asm ("movntiq %1, %0" : "=m"(*(u64 *)(dst + 8)) : "r"(*(u64 *)(src + 8))); + return; + } + } + __memcpy_flushcache(dst, src, cnt); +} #endif #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 53748541c487..056a61c8c5c7 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -77,30 +77,4 @@ static inline void gtod_write_end(struct vsyscall_gtod_data *s) ++s->seq; } -#ifdef CONFIG_X86_64 - -#define VGETCPU_CPU_MASK 0xfff - -static inline unsigned int __getcpu(void) -{ - unsigned int p; - - /* - * Load per CPU data from GDT. LSL is faster than RDTSCP and - * works on all CPUs. This is volatile so that it orders - * correctly wrt barrier() and to keep gcc from cleverly - * hoisting it out of the calling function. - * - * If RDPID is available, use it. - */ - alternative_io ("lsl %[seg],%[p]", - ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */ - X86_FEATURE_RDPID, - [p] "=a" (p), [seg] "r" (__PER_CPU_SEG)); - - return p; -} - -#endif /* CONFIG_X86_64 */ - #endif /* _ASM_X86_VGTOD_H */ |