summaryrefslogtreecommitdiff
path: root/arch/mips/lib/memcpy.S
diff options
context:
space:
mode:
authorThomas Bogendoerfer <tsbogend@alpha.franken.de>2007-11-25 13:47:56 +0300
committerRalf Baechle <ralf@linux-mips.org>2008-01-29 13:14:58 +0300
commit930bff882296c02ca81db108672ef4ca06c37db5 (patch)
tree53288137d4f7cc02d8ca417edb2b25221c3007cd /arch/mips/lib/memcpy.S
parent2064ba23e58daa929eec6f5e7a2abc24574a95b9 (diff)
downloadlinux-930bff882296c02ca81db108672ef4ca06c37db5.tar.xz
[MIPS] IP28: added cache barrier to assembly routines
IP28 needs special treatment to avoid speculative accesses. gcc takes care for .c code, but for assembly code we need to do it manually. This is taken from Peter Fuersts IP28 patches. Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib/memcpy.S')
-rw-r--r--arch/mips/lib/memcpy.S10
1 files changed, 10 insertions, 0 deletions
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index aded7b159052..01e450b1ebc9 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -199,6 +199,7 @@ FEXPORT(__copy_user)
*/
#define rem t8
+ R10KCBARRIER(0(ra))
/*
* The "issue break"s below are very approximate.
* Issue delays for dcache fills will perturb the schedule, as will
@@ -231,6 +232,7 @@ both_aligned:
PREF( 1, 3*32(dst) )
.align 4
1:
+ R10KCBARRIER(0(ra))
EXC( LOAD t0, UNIT(0)(src), l_exc)
EXC( LOAD t1, UNIT(1)(src), l_exc_copy)
EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
@@ -272,6 +274,7 @@ EXC( LOAD t2, UNIT(2)(src), l_exc_copy)
EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
SUB len, len, 4*NBYTES
ADD src, src, 4*NBYTES
+ R10KCBARRIER(0(ra))
EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
@@ -287,6 +290,7 @@ less_than_4units:
beq rem, len, copy_bytes
nop
1:
+ R10KCBARRIER(0(ra))
EXC( LOAD t0, 0(src), l_exc)
ADD src, src, NBYTES
SUB len, len, NBYTES
@@ -334,6 +338,7 @@ EXC( LDFIRST t3, FIRST(0)(src), l_exc)
EXC( LDREST t3, REST(0)(src), l_exc_copy)
SUB t2, t2, t1 # t2 = number of bytes copied
xor match, t0, t1
+ R10KCBARRIER(0(ra))
EXC( STFIRST t3, FIRST(0)(dst), s_exc)
beq len, t2, done
SUB len, len, t2
@@ -354,6 +359,7 @@ src_unaligned_dst_aligned:
* It's OK to load FIRST(N+1) before REST(N) because the two addresses
* are to the same unit (unless src is aligned, but it's not).
*/
+ R10KCBARRIER(0(ra))
EXC( LDFIRST t0, FIRST(0)(src), l_exc)
EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy)
SUB len, len, 4*NBYTES
@@ -384,6 +390,7 @@ cleanup_src_unaligned:
beq rem, len, copy_bytes
nop
1:
+ R10KCBARRIER(0(ra))
EXC( LDFIRST t0, FIRST(0)(src), l_exc)
EXC( LDREST t0, REST(0)(src), l_exc_copy)
ADD src, src, NBYTES
@@ -399,6 +406,7 @@ copy_bytes_checklen:
nop
copy_bytes:
/* 0 < len < NBYTES */
+ R10KCBARRIER(0(ra))
#define COPY_BYTE(N) \
EXC( lb t0, N(src), l_exc); \
SUB len, len, 1; \
@@ -528,6 +536,7 @@ LEAF(__rmemcpy) /* a0=dst a1=src a2=len */
ADD a1, a2 # src = src + len
r_end_bytes:
+ R10KCBARRIER(0(ra))
lb t0, -1(a1)
SUB a2, a2, 0x1
sb t0, -1(a0)
@@ -542,6 +551,7 @@ r_out:
move a2, zero
r_end_bytes_up:
+ R10KCBARRIER(0(ra))
lb t0, (a1)
SUB a2, a2, 0x1
sb t0, (a0)