ARC: atomics: implement relaxed variants

The current ARC fetch/return atomics provide fully ordered semantics only with 2 full barriers around the operation. Instead implement them as relaxed variants without any barriers and rely on generic code to generate the fully-ordered, acquire and release varaints by adding the appropriate full barriers. This helps elide some extra barriers in case of acquire/release/relaxed calls. bloat-o-meter for hsdk defconfig shows codegen improvements, although numbers below inflated due to unrelated inlining heuristic changes | bloat-o-meter vmlinux-643babe34fd7-non-relaxed vmlinux-45aa05cb44d7-relaxed | add/remove: 2/5 grow/shrink: 42/1222 up/down: 4158/-14312 (-10154) | Function old new delta | .. | sys_renameat 462 476 +14 | ip_mc_inc_group 424 436 +12 | do_read_cache_page 1882 1894 +12 | .. | refcount_dec_and_mutex_lock 254 250 -4 | refcount_dec_and_lock_irqsave 258 254 -4 | refcount_dec_and_lock 254 250 -4 | .. | tcp_v6_route_req 246 238 -8 | tcp_v4_destroy_sock 286 278 -8 | tcp_twsk_unique 352 344 -8 Link: https://lore.kernel.org/r/20180830144344.GW24142@hirez.programming.kicks-ass.net Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Vineet Gupta <vgupta@kernel.org>
author: Vineet Gupta <vgupta@kernel.org> 2020-04-14 04:07:49 +0300
committer: Vineet Gupta <vgupta@kernel.org> 2021-08-25 00:25:47 +0300
commit: b64be6836993c431e54fad239fcba0543854ee35 (patch)
tree: 9d5fe48754db52199426e9b77551521a0aa5e150 /arch/arc/include/asm/atomic64-arcv2.h
parent: 7e8f8cbb43990861e5881594e14d491f81931f8d (diff)
download: linux-b64be6836993c431e54fad239fcba0543854ee35.tar.xz
1 files changed, 13 insertions, 11 deletions
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
index 22ef1cbb94e2..c5a8010fdc97 100644
--- a/arch/arc/include/asm/atomic64-arcv2.h
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -64,12 +64,10 @@ static inline void arch_atomic64_##op(s64 a, atomic64_t *v)		\
 }									\
 
 #define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
-static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v)	\
+static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)	\
 {									\
 	s64 val;							\
 									\
-	smp_mb();							\
-									\
 	__asm__ __volatile__(						\
 	"1:				\n"				\
 	"	llockd   %0, [%1]	\n"				\
@@ -81,18 +79,17 @@ static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v)	\
 	: "r"(&v->counter), "ir"(a)					\
 	: "cc");	/* memory clobber comes from smp_mb() */	\
 									\
-	smp_mb();							\
-									\
 	return val;							\
 }
 
+#define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+
 #define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
-static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v)	\
+static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)	\
 {									\
 	s64 val, orig;							\
 									\
-	smp_mb();							\
-									\
 	__asm__ __volatile__(						\
 	"1:				\n"				\
 	"	llockd   %0, [%2]	\n"				\
@@ -104,11 +101,17 @@ static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v)	\
 	: "r"(&v->counter), "ir"(a)					\
 	: "cc");	/* memory clobber comes from smp_mb() */	\
 									\
-	smp_mb();							\
-									\
 	return orig;							\
 }
 
+#define arch_atomic64_fetch_add_relaxed		arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed		arch_atomic64_fetch_sub_relaxed
+
+#define arch_atomic64_fetch_and_relaxed		arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_andnot_relaxed	arch_atomic64_fetch_andnot_relaxed
+#define arch_atomic64_fetch_or_relaxed		arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed		arch_atomic64_fetch_xor_relaxed
+
 #define ATOMIC64_OPS(op, op1, op2)					\
 	ATOMIC64_OP(op, op1, op2)					\
 	ATOMIC64_OP_RETURN(op, op1, op2)				\
@@ -128,7 +131,6 @@ ATOMIC64_OPS(or, or, or)
 ATOMIC64_OPS(xor, xor, xor)
 
 #define arch_atomic64_andnot		arch_atomic64_andnot
-#define arch_atomic64_fetch_andnot	arch_atomic64_fetch_andnot
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_FETCH_OP
author	Vineet Gupta <vgupta@kernel.org>	2020-04-14 04:07:49 +0300
committer	Vineet Gupta <vgupta@kernel.org>	2021-08-25 00:25:47 +0300
commit	b64be6836993c431e54fad239fcba0543854ee35 (patch)
tree	9d5fe48754db52199426e9b77551521a0aa5e150 /arch/arc/include/asm/atomic64-arcv2.h
parent	7e8f8cbb43990861e5881594e14d491f81931f8d (diff)
download	linux-b64be6836993c431e54fad239fcba0543854ee35.tar.xz