diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-10-31 03:18:00 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-10-31 03:18:00 +0300 |
commit | 5780e39edbb49bb441f1c8eb9e6cb8be92dee31d (patch) | |
tree | 0b0aa6e1896db838785fce2ed078e3b7e07ab98d /arch/x86/include/asm | |
parent | 2b95bb052656d46c2073b87f9487a53ef5e79732 (diff) | |
parent | 8ae292c66dcb160b3e1e16b66c3076d5a2c63873 (diff) | |
download | linux-5780e39edbb49bb441f1c8eb9e6cb8be92dee31d.tar.xz |
Merge tag 'x86-asm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 assembly code updates from Ingo Molnar:
- Micro-optimize the x86 bitops code
- Define target-specific {raw,this}_cpu_try_cmpxchg{64,128}() to
improve code generation
- Define and use raw_cpu_try_cmpxchg() preempt_count_set()
- Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op
- Remove the unused __sw_hweight64() implementation on x86-32
- Misc fixes and cleanups
* tag 'x86-asm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/lib: Address kernel-doc warnings
x86/entry: Fix typos in comments
x86/entry: Remove unused argument %rsi passed to exc_nmi()
x86/bitops: Remove unused __sw_hweight64() assembly implementation on x86-32
x86/percpu: Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op
x86/percpu: Use raw_cpu_try_cmpxchg() in preempt_count_set()
x86/percpu: Define raw_cpu_try_cmpxchg and this_cpu_try_cmpxchg()
x86/percpu: Define {raw,this}_cpu_try_cmpxchg{64,128}
x86/asm/bitops: Use __builtin_clz{l|ll} to evaluate constant expressions
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r-- | arch/x86/include/asm/bitops.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/percpu.h | 110 | ||||
-rw-r--r-- | arch/x86/include/asm/preempt.h | 4 |
3 files changed, 115 insertions, 8 deletions
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 2edf68475fec..50e5ebf9d0a0 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -293,6 +293,9 @@ static __always_inline unsigned long variable_ffz(unsigned long word) */ static __always_inline unsigned long __fls(unsigned long word) { + if (__builtin_constant_p(word)) + return BITS_PER_LONG - 1 - __builtin_clzl(word); + asm("bsr %1,%0" : "=r" (word) : "rm" (word)); @@ -360,6 +363,9 @@ static __always_inline int fls(unsigned int x) { int r; + if (__builtin_constant_p(x)) + return x ? 32 - __builtin_clz(x) : 0; + #ifdef CONFIG_X86_64 /* * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the @@ -401,6 +407,9 @@ static __always_inline int fls(unsigned int x) static __always_inline int fls64(__u64 x) { int bitpos = -1; + + if (__builtin_constant_p(x)) + return x ? 64 - __builtin_clzll(x) : 0; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 34734d730463..20624b80f890 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -210,6 +210,25 @@ do { \ (typeof(_var))(unsigned long) pco_old__; \ }) +#define percpu_try_cmpxchg_op(size, qual, _var, _ovalp, _nval) \ +({ \ + bool success; \ + __pcpu_type_##size *pco_oval__ = (__pcpu_type_##size *)(_ovalp); \ + __pcpu_type_##size pco_old__ = *pco_oval__; \ + __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \ + asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \ + __percpu_arg([var])) \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [oval] "+a" (pco_old__), \ + [var] "+m" (_var) \ + : [nval] __pcpu_reg_##size(, pco_new__) \ + : "memory"); \ + if (unlikely(!success)) \ + *pco_oval__ = pco_old__; \ + likely(success); \ +}) + #if defined(CONFIG_X86_32) && !defined(CONFIG_UML) #define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \ ({ \ @@ -223,26 +242,63 @@ do { \ old__.var = _oval; \ new__.var = _nval; \ \ - asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \ + asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ : [var] "+m" (_var), \ "+a" (old__.low), \ "+d" (old__.high) \ : "b" (new__.low), \ - "c" (new__.high) \ - : "memory", "esi"); \ + "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ \ old__.var; \ }) #define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval) #define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval) + +#define percpu_try_cmpxchg64_op(size, qual, _var, _ovalp, _nval) \ +({ \ + bool success; \ + u64 *_oval = (u64 *)(_ovalp); \ + union { \ + u64 var; \ + struct { \ + u32 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = *_oval; \ + new__.var = _nval; \ + \ + asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \ + "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [var] "+m" (_var), \ + "+a" (old__.low), \ + "+d" (old__.high) \ + : "b" (new__.low), \ + "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ + if (unlikely(!success)) \ + *_oval = old__.var; \ + likely(success); \ +}) + +#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, , pcp, ovalp, nval) +#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, volatile, pcp, ovalp, nval) #endif #ifdef CONFIG_X86_64 #define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval); #define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval); +#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval); +#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval); + #define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval) \ ({ \ union { \ @@ -255,20 +311,54 @@ do { \ old__.var = _oval; \ new__.var = _nval; \ \ - asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \ + asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ : [var] "+m" (_var), \ "+a" (old__.low), \ "+d" (old__.high) \ : "b" (new__.low), \ - "c" (new__.high) \ - : "memory", "rsi"); \ + "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ \ old__.var; \ }) #define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval) #define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval) + +#define percpu_try_cmpxchg128_op(size, qual, _var, _ovalp, _nval) \ +({ \ + bool success; \ + u128 *_oval = (u128 *)(_ovalp); \ + union { \ + u128 var; \ + struct { \ + u64 low, high; \ + }; \ + } old__, new__; \ + \ + old__.var = *_oval; \ + new__.var = _nval; \ + \ + asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \ + "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \ + CC_SET(z) \ + : CC_OUT(z) (success), \ + [var] "+m" (_var), \ + "+a" (old__.low), \ + "+d" (old__.high) \ + : "b" (new__.low), \ + "c" (new__.high), \ + "S" (&(_var)) \ + : "memory"); \ + if (unlikely(!success)) \ + *_oval = old__.var; \ + likely(success); \ +}) + +#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, , pcp, ovalp, nval) +#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, volatile, pcp, ovalp, nval) #endif /* @@ -343,6 +433,9 @@ do { \ #define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval) #define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval) #define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval) +#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, , pcp, ovalp, nval) +#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, , pcp, ovalp, nval) +#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, , pcp, ovalp, nval) #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val) #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val) @@ -350,6 +443,9 @@ do { \ #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval) #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval) #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval) +#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, volatile, pcp, ovalp, nval) +#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, volatile, pcp, ovalp, nval) +#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, volatile, pcp, ovalp, nval) /* * Per cpu atomic 64 bit operations are only available under 64 bit. @@ -364,6 +460,7 @@ do { \ #define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val) #define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval) #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval) +#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval) #define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp) #define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val) @@ -373,6 +470,7 @@ do { \ #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val) #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) +#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval) #endif static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 2d13f25b1bd8..4527e1430c6d 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -31,11 +31,11 @@ static __always_inline void preempt_count_set(int pc) { int old, new; + old = raw_cpu_read_4(pcpu_hot.preempt_count); do { - old = raw_cpu_read_4(pcpu_hot.preempt_count); new = (old & PREEMPT_NEED_RESCHED) | (pc & ~PREEMPT_NEED_RESCHED); - } while (raw_cpu_cmpxchg_4(pcpu_hot.preempt_count, old, new) != old); + } while (!raw_cpu_try_cmpxchg_4(pcpu_hot.preempt_count, &old, new)); } /* |