From b2c0ab17ba751abe13a28508b1ac7e9ca074cd87 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 6 Sep 2005 13:05:58 +1000
Subject: [PATCH] ppc64: speedup cmpxchg

cmpxchg has the following code:

__typeof__(*(ptr)) _o_ = (o);
__typeof__(*(ptr)) _n_ = (n);

Unfortunately it makes gcc 4.0 store and load the variables to the stack.
Eg in atomic_dec_and_test we get:

  stw     r10,112(r1)
  stw     r9,116(r1)
  lwz     r9,112(r1)
  lwz     r0,116(r1)

x86 is just casting the values so do that instead. Also change __xchg*
and __cmpxchg* to take unsigned values, removing a few sign extensions.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-ppc64/system.h | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h
index b9e1835351e9..c0396428cc3c 100644
--- a/include/asm-ppc64/system.h
+++ b/include/asm-ppc64/system.h
@@ -158,7 +158,7 @@ static inline int __is_processor(unsigned long pv)
  * is more like most of the other architectures.
  */
 static __inline__ unsigned long
-__xchg_u32(volatile int *m, unsigned long val)
+__xchg_u32(volatile unsigned int *m, unsigned long val)
 {
 	unsigned long dummy;
 
@@ -200,7 +200,7 @@ __xchg_u64(volatile long *m, unsigned long val)
 extern void __xchg_called_with_bad_pointer(void);
 
 static __inline__ unsigned long
-__xchg(volatile void *ptr, unsigned long x, int size)
+__xchg(volatile void *ptr, unsigned long x, unsigned int size)
 {
 	switch (size) {
 	case 4:
@@ -223,7 +223,7 @@ __xchg(volatile void *ptr, unsigned long x, int size)
 #define __HAVE_ARCH_CMPXCHG	1
 
 static __inline__ unsigned long
-__cmpxchg_u32(volatile int *p, int old, int new)
+__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
 {
 	unsigned int prev;
 
@@ -271,7 +271,8 @@ __cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new)
 extern void __cmpxchg_called_with_bad_pointer(void);
 
 static __inline__ unsigned long
-__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
+	  unsigned int size)
 {
 	switch (size) {
 	case 4:
@@ -283,13 +284,9 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
 	return old;
 }
 
-#define cmpxchg(ptr,o,n)						 \
-  ({									 \
-     __typeof__(*(ptr)) _o_ = (o);					 \
-     __typeof__(*(ptr)) _n_ = (n);					 \
-     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
-				    (unsigned long)_n_, sizeof(*(ptr))); \
-  })
+#define cmpxchg(ptr,o,n)\
+	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+	(unsigned long)(n),sizeof(*(ptr))))
 
 /*
  * We handle most unaligned accesses in hardware. On the other hand 
-- 
cgit v1.2.3