From cf3ee3c8c29dc349b2cf52e5e72e8cb805ff5e57 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 13 Jul 2021 11:52:53 +0100 Subject: locking/atomic: add generic arch_*() bitops Now that all architectures provide arch_atomic_long_*(), we can implement the generic bitops atop these rather than atop atomic_long_*(), and provide arch_*() forms of the bitops that are safe to use in noinstr code. Now that all architectures provide arch_atomic_long_*(), we can build the generic arch_*() bitops atop these, which can be safely used in noinstr code. The regular bitop wrappers are built atop these. As the generic non-atomic bitops use plain accesses, these will be implicitly instrumented unless they are inlined into noinstr functions (which is similar to arch_atomic*_read() when based on READ_ONCE()). The wrappers are modified so that where the underlying arch_*() function uses a plain access, no explicit instrumentation is added, as this is redundant and could result in confusing reports. Since function prototypes get excessively long with both an `arch_` prefix and `__always_inline` attribute, the return type and function attributes have been split onto a separate line, matching the style of the generated atomic headers. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210713105253.7615-6-mark.rutland@arm.com --- include/asm-generic/bitops/non-atomic.h | 41 ++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'include/asm-generic/bitops/non-atomic.h') diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h index 7e10c4b50c5d..c8149cd52730 100644 --- a/include/asm-generic/bitops/non-atomic.h +++ b/include/asm-generic/bitops/non-atomic.h @@ -5,7 +5,7 @@ #include /** - * __set_bit - Set a bit in memory + * arch___set_bit - Set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * @@ -13,24 +13,28 @@ * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __set_bit(int nr, volatile unsigned long *addr) +static __always_inline void +arch___set_bit(int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); *p |= mask; } +#define arch___set_bit_uses_plain_access -static inline void __clear_bit(int nr, volatile unsigned long *addr) +static __always_inline void +arch___clear_bit(int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); *p &= ~mask; } +#define arch___clear_bit_uses_plain_access /** - * __change_bit - Toggle a bit in memory + * arch___change_bit - Toggle a bit in memory * @nr: the bit to change * @addr: the address to start counting from * @@ -38,16 +42,18 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __change_bit(int nr, volatile unsigned long *addr) +static __always_inline +void arch___change_bit(int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); *p ^= mask; } +#define arch___change_bit_uses_plain_access /** - * __test_and_set_bit - Set a bit and return its old value + * arch___test_and_set_bit - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * @@ -55,7 +61,8 @@ static inline void __change_bit(int nr, volatile unsigned long *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +static __always_inline int +arch___test_and_set_bit(int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -64,9 +71,10 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) *p = old | mask; return (old & mask) != 0; } +#define arch___test_and_set_bit_uses_plain_access /** - * __test_and_clear_bit - Clear a bit and return its old value + * arch___test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to clear * @addr: Address to count from * @@ -74,7 +82,8 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +static __always_inline int +arch___test_and_clear_bit(int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -83,10 +92,11 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) *p = old & ~mask; return (old & mask) != 0; } +#define arch___test_and_clear_bit_uses_plain_access /* WARNING: non atomic and it can be reordered! */ -static inline int __test_and_change_bit(int nr, - volatile unsigned long *addr) +static __always_inline int +arch___test_and_change_bit(int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -95,15 +105,20 @@ static inline int __test_and_change_bit(int nr, *p = old ^ mask; return (old & mask) != 0; } +#define arch___test_and_change_bit_uses_plain_access /** - * test_bit - Determine whether a bit is set + * arch_test_bit - Determine whether a bit is set * @nr: bit number to test * @addr: Address to start counting from */ -static inline int test_bit(int nr, const volatile unsigned long *addr) +static __always_inline int +arch_test_bit(int nr, const volatile unsigned long *addr) { return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +#define arch_test_bit_uses_plain_access + +#include #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ -- cgit v1.2.3 From 9248e52fec9536590852844b0634b5d20483c1ab Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 21 Jul 2021 16:58:13 +0100 Subject: locking/atomic: simplify non-atomic wrappers Since the non-atomic arch_*() bitops use plain accesses, they are implicitly instrumnted by the compiler, and we work around this in the instrumented wrappers to avoid double instrumentation. It's simpler to avoid the wrappers entirely, and use the preprocessor to alias the arch_*() bitops to their regular versions, removing the need for checks in the instrumented wrappers. Suggested-by: Marco Elver Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Marco Elver Link: https://lore.kernel.org/r/20210721155813.17082-1-mark.rutland@arm.com --- .../asm-generic/bitops/instrumented-non-atomic.h | 21 +++++++-------------- include/asm-generic/bitops/non-atomic.h | 16 +++++++--------- 2 files changed, 14 insertions(+), 23 deletions(-) (limited to 'include/asm-generic/bitops/non-atomic.h') diff --git a/include/asm-generic/bitops/instrumented-non-atomic.h b/include/asm-generic/bitops/instrumented-non-atomic.h index e6c1540965d6..37363d570b9b 100644 --- a/include/asm-generic/bitops/instrumented-non-atomic.h +++ b/include/asm-generic/bitops/instrumented-non-atomic.h @@ -24,8 +24,7 @@ */ static inline void __set_bit(long nr, volatile unsigned long *addr) { - if (!__is_defined(arch___set_bit_uses_plain_access)) - instrument_write(addr + BIT_WORD(nr), sizeof(long)); + instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___set_bit(nr, addr); } @@ -40,8 +39,7 @@ static inline void __set_bit(long nr, volatile unsigned long *addr) */ static inline void __clear_bit(long nr, volatile unsigned long *addr) { - if (!__is_defined(arch___clear_bit_uses_plain_access)) - instrument_write(addr + BIT_WORD(nr), sizeof(long)); + instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___clear_bit(nr, addr); } @@ -56,8 +54,7 @@ static inline void __clear_bit(long nr, volatile unsigned long *addr) */ static inline void __change_bit(long nr, volatile unsigned long *addr) { - if (!__is_defined(arch___change_bit_uses_plain_access)) - instrument_write(addr + BIT_WORD(nr), sizeof(long)); + instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___change_bit(nr, addr); } @@ -95,8 +92,7 @@ static inline void __instrument_read_write_bitop(long nr, volatile unsigned long */ static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) { - if (!__is_defined(arch___test_and_set_bit_uses_plain_access)) - __instrument_read_write_bitop(nr, addr); + __instrument_read_write_bitop(nr, addr); return arch___test_and_set_bit(nr, addr); } @@ -110,8 +106,7 @@ static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) { - if (!__is_defined(arch___test_and_clear_bit_uses_plain_access)) - __instrument_read_write_bitop(nr, addr); + __instrument_read_write_bitop(nr, addr); return arch___test_and_clear_bit(nr, addr); } @@ -125,8 +120,7 @@ static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) */ static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) { - if (!__is_defined(arch___test_and_change_bit_uses_plain_access)) - __instrument_read_write_bitop(nr, addr); + __instrument_read_write_bitop(nr, addr); return arch___test_and_change_bit(nr, addr); } @@ -137,8 +131,7 @@ static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) */ static inline bool test_bit(long nr, const volatile unsigned long *addr) { - if (!__is_defined(arch_test_bit_uses_plain_access)) - instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); return arch_test_bit(nr, addr); } diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h index c8149cd52730..365377fb104b 100644 --- a/include/asm-generic/bitops/non-atomic.h +++ b/include/asm-generic/bitops/non-atomic.h @@ -21,7 +21,7 @@ arch___set_bit(int nr, volatile unsigned long *addr) *p |= mask; } -#define arch___set_bit_uses_plain_access +#define __set_bit arch___set_bit static __always_inline void arch___clear_bit(int nr, volatile unsigned long *addr) @@ -31,7 +31,7 @@ arch___clear_bit(int nr, volatile unsigned long *addr) *p &= ~mask; } -#define arch___clear_bit_uses_plain_access +#define __clear_bit arch___clear_bit /** * arch___change_bit - Toggle a bit in memory @@ -50,7 +50,7 @@ void arch___change_bit(int nr, volatile unsigned long *addr) *p ^= mask; } -#define arch___change_bit_uses_plain_access +#define __change_bit arch___change_bit /** * arch___test_and_set_bit - Set a bit and return its old value @@ -71,7 +71,7 @@ arch___test_and_set_bit(int nr, volatile unsigned long *addr) *p = old | mask; return (old & mask) != 0; } -#define arch___test_and_set_bit_uses_plain_access +#define __test_and_set_bit arch___test_and_set_bit /** * arch___test_and_clear_bit - Clear a bit and return its old value @@ -92,7 +92,7 @@ arch___test_and_clear_bit(int nr, volatile unsigned long *addr) *p = old & ~mask; return (old & mask) != 0; } -#define arch___test_and_clear_bit_uses_plain_access +#define __test_and_clear_bit arch___test_and_clear_bit /* WARNING: non atomic and it can be reordered! */ static __always_inline int @@ -105,7 +105,7 @@ arch___test_and_change_bit(int nr, volatile unsigned long *addr) *p = old ^ mask; return (old & mask) != 0; } -#define arch___test_and_change_bit_uses_plain_access +#define __test_and_change_bit arch___test_and_change_bit /** * arch_test_bit - Determine whether a bit is set @@ -117,8 +117,6 @@ arch_test_bit(int nr, const volatile unsigned long *addr) { return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } -#define arch_test_bit_uses_plain_access - -#include +#define test_bit arch_test_bit #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ -- cgit v1.2.3 From 8f76f9c46952659dd925c21c3f62a0d05a3f3e71 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 Aug 2021 12:14:08 -0700 Subject: bitops/non-atomic: make @nr unsigned to avoid any DIV signed math causes generation of costlier instructions such as DIV when they could be done by barrerl shifter. Worse part is this is not caught by things like bloat-o-meter since instruction length / symbols are typically same size. e.g. stock (signed math) __________________ 919b4614 : 919b4614: div r2,r0,0x20 ^^^ 919b4618: add2 r2,0x920f6050,r2 919b4620: ld_s r2,[r2,0] 919b4622: lsr r0,r2,r0 919b4626: j_s.d [blink] 919b4628: bmsk_s r0,r0,0 919b462a: nop_s (patched) unsigned math __________________ 919b4614 : 919b4614: lsr r2,r0,0x5 @nr/32 ^^^ 919b4618: add2 r2,0x920f6050,r2 919b4620: ld_s r2,[r2,0] 919b4622: lsr r0,r2,r0 #test_bit() 919b4626: j_s.d [blink] 919b4628: bmsk_s r0,r0,0 919b462a: nop_s Signed-off-by: Vineet Gupta Acked-by: Will Deacon Signed-off-by: Arnd Bergmann --- include/asm-generic/bitops/non-atomic.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/asm-generic/bitops/non-atomic.h') diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h index 7e10c4b50c5d..c5a7d8eb9c2b 100644 --- a/include/asm-generic/bitops/non-atomic.h +++ b/include/asm-generic/bitops/non-atomic.h @@ -13,7 +13,7 @@ * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __set_bit(int nr, volatile unsigned long *addr) +static inline void __set_bit(unsigned int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -21,7 +21,7 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) *p |= mask; } -static inline void __clear_bit(int nr, volatile unsigned long *addr) +static inline void __clear_bit(unsigned int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -38,7 +38,7 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr) * If it's called on the same region of memory simultaneously, the effect * may be that only one operation succeeds. */ -static inline void __change_bit(int nr, volatile unsigned long *addr) +static inline void __change_bit(unsigned int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -55,7 +55,7 @@ static inline void __change_bit(int nr, volatile unsigned long *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +static inline int __test_and_set_bit(unsigned int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -74,7 +74,7 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +static inline int __test_and_clear_bit(unsigned int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -85,7 +85,7 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) } /* WARNING: non atomic and it can be reordered! */ -static inline int __test_and_change_bit(int nr, +static inline int __test_and_change_bit(unsigned int nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); @@ -101,7 +101,7 @@ static inline int __test_and_change_bit(int nr, * @nr: bit number to test * @addr: Address to start counting from */ -static inline int test_bit(int nr, const volatile unsigned long *addr) +static inline int test_bit(unsigned int nr, const volatile unsigned long *addr) { return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } -- cgit v1.2.3