summaryrefslogtreecommitdiff
path: root/arch/ppc64/lib/memcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ppc64/lib/memcpy.S')
-rw-r--r--arch/ppc64/lib/memcpy.S172
1 files changed, 0 insertions, 172 deletions
diff --git a/arch/ppc64/lib/memcpy.S b/arch/ppc64/lib/memcpy.S
deleted file mode 100644
index 9ccacdf5bcb9..000000000000
--- a/arch/ppc64/lib/memcpy.S
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * arch/ppc64/lib/memcpy.S
- *
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-
- .align 7
-_GLOBAL(memcpy)
- mtcrf 0x01,r5
- cmpldi cr1,r5,16
- neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
- andi. r6,r6,7
- dcbt 0,r4
- blt cr1,.Lshort_copy
- bne .Ldst_unaligned
-.Ldst_aligned:
- andi. r0,r4,7
- addi r3,r3,-16
- bne .Lsrc_unaligned
- srdi r7,r5,4
- ld r9,0(r4)
- addi r4,r4,-8
- mtctr r7
- andi. r5,r5,7
- bf cr7*4+0,2f
- addi r3,r3,8
- addi r4,r4,8
- mr r8,r9
- blt cr1,3f
-1: ld r9,8(r4)
- std r8,8(r3)
-2: ldu r8,16(r4)
- stdu r9,16(r3)
- bdnz 1b
-3: std r8,8(r3)
- beqlr
- addi r3,r3,16
- ld r9,8(r4)
-.Ldo_tail:
- bf cr7*4+1,1f
- rotldi r9,r9,32
- stw r9,0(r3)
- addi r3,r3,4
-1: bf cr7*4+2,2f
- rotldi r9,r9,16
- sth r9,0(r3)
- addi r3,r3,2
-2: bf cr7*4+3,3f
- rotldi r9,r9,8
- stb r9,0(r3)
-3: blr
-
-.Lsrc_unaligned:
- srdi r6,r5,3
- addi r5,r5,-16
- subf r4,r0,r4
- srdi r7,r5,4
- sldi r10,r0,3
- cmpdi cr6,r6,3
- andi. r5,r5,7
- mtctr r7
- subfic r11,r10,64
- add r5,r5,r0
-
- bt cr7*4+0,0f
-
- ld r9,0(r4) # 3+2n loads, 2+2n stores
- ld r0,8(r4)
- sld r6,r9,r10
- ldu r9,16(r4)
- srd r7,r0,r11
- sld r8,r0,r10
- or r7,r7,r6
- blt cr6,4f
- ld r0,8(r4)
- # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
- b 2f
-
-0: ld r0,0(r4) # 4+2n loads, 3+2n stores
- ldu r9,8(r4)
- sld r8,r0,r10
- addi r3,r3,-8
- blt cr6,5f
- ld r0,8(r4)
- srd r12,r9,r11
- sld r6,r9,r10
- ldu r9,16(r4)
- or r12,r8,r12
- srd r7,r0,r11
- sld r8,r0,r10
- addi r3,r3,16
- beq cr6,3f
-
- # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
-1: or r7,r7,r6
- ld r0,8(r4)
- std r12,8(r3)
-2: srd r12,r9,r11
- sld r6,r9,r10
- ldu r9,16(r4)
- or r12,r8,r12
- stdu r7,16(r3)
- srd r7,r0,r11
- sld r8,r0,r10
- bdnz 1b
-
-3: std r12,8(r3)
- or r7,r7,r6
-4: std r7,16(r3)
-5: srd r12,r9,r11
- or r12,r8,r12
- std r12,24(r3)
- beqlr
- cmpwi cr1,r5,8
- addi r3,r3,32
- sld r9,r9,r10
- ble cr1,.Ldo_tail
- ld r0,8(r4)
- srd r7,r0,r11
- or r9,r7,r9
- b .Ldo_tail
-
-.Ldst_unaligned:
- mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7
- subf r5,r6,r5
- li r7,0
- cmpldi r1,r5,16
- bf cr7*4+3,1f
- lbz r0,0(r4)
- stb r0,0(r3)
- addi r7,r7,1
-1: bf cr7*4+2,2f
- lhzx r0,r7,r4
- sthx r0,r7,r3
- addi r7,r7,2
-2: bf cr7*4+1,3f
- lwzx r0,r7,r4
- stwx r0,r7,r3
-3: mtcrf 0x01,r5
- add r4,r6,r4
- add r3,r6,r3
- b .Ldst_aligned
-
-.Lshort_copy:
- bf cr7*4+0,1f
- lwz r0,0(r4)
- lwz r9,4(r4)
- addi r4,r4,8
- stw r0,0(r3)
- stw r9,4(r3)
- addi r3,r3,8
-1: bf cr7*4+1,2f
- lwz r0,0(r4)
- addi r4,r4,4
- stw r0,0(r3)
- addi r3,r3,4
-2: bf cr7*4+2,3f
- lhz r0,0(r4)
- addi r4,r4,2
- sth r0,0(r3)
- addi r3,r3,2
-3: bf cr7*4+3,4f
- lbz r0,0(r4)
- stb r0,0(r3)
-4: blr