summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/sparc/lib/memmove.S35
1 files changed, 32 insertions, 3 deletions
diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S
index b7f6334e159f..857ad4f8905f 100644
--- a/arch/sparc/lib/memmove.S
+++ b/arch/sparc/lib/memmove.S
@@ -8,9 +8,11 @@
.text
ENTRY(memmove) /* o0=dst o1=src o2=len */
- mov %o0, %g1
+ brz,pn %o2, 99f
+ mov %o0, %g1
+
cmp %o0, %o1
- bleu,pt %xcc, memcpy
+ bleu,pt %xcc, 2f
add %o1, %o2, %g7
cmp %g7, %o0
bleu,pt %xcc, memcpy
@@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */
stb %g7, [%o0]
bne,pt %icc, 1b
sub %o0, 1, %o0
-
+99:
retl
mov %g1, %o0
+
+ /* We can't just call memcpy for these memmove cases. On some
+ * chips the memcpy uses cache initializing stores and when dst
+ * and src are close enough, those can clobber the source data
+ * before we've loaded it in.
+ */
+2: or %o0, %o1, %g7
+ or %o2, %g7, %g7
+ andcc %g7, 0x7, %g0
+ bne,pn %xcc, 4f
+ nop
+
+3: ldx [%o1], %g7
+ add %o1, 8, %o1
+ subcc %o2, 8, %o2
+ add %o0, 8, %o0
+ bne,pt %icc, 3b
+ stx %g7, [%o0 - 0x8]
+ ba,a,pt %xcc, 99b
+
+4: ldub [%o1], %g7
+ add %o1, 1, %o1
+ subcc %o2, 1, %o2
+ add %o0, 1, %o0
+ bne,pt %icc, 4b
+ stb %g7, [%o0 - 0x1]
+ ba,a,pt %xcc, 99b
ENDPROC(memmove)