summaryrefslogtreecommitdiff
path: root/arch/arc/lib/strcpy-700.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 19:58:56 +0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 19:58:56 +0400
commite23b62256a361611cbd45cd1456638f1a5106b5c (patch)
tree472968c961432a1d9d0c3634ca20433f1d9cd29b /arch/arc/lib/strcpy-700.S
parentaebb2afd5420c860b7fbc3882a323ef1247fbf16 (diff)
parent8ccfe6675fa974bd06d64f74d0fdee6a5267d2aa (diff)
downloadlinux-e23b62256a361611cbd45cd1456638f1a5106b5c.tar.xz
Merge tag 'arc-v3.9-rc1-late' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
Pull new ARC architecture from Vineet Gupta: "Initial ARC Linux port with some fixes on top for 3.9-rc1: I would like to introduce the Linux port to ARC Processors (from Synopsys) for 3.9-rc1. The patch-set has been discussed on the public lists since Nov and has received a fair bit of review, specially from Arnd, tglx, Al and other subsystem maintainers for DeviceTree, kgdb... The arch bits are in arch/arc, some asm-generic changes (acked by Arnd), a minor change to PARISC (acked by Helge). The series is a touch bigger for a new port for 2 main reasons: 1. It enables a basic kernel in first sub-series and adds ptrace/kgdb/.. later 2. Some of the fallout of review (DeviceTree support, multi-platform- image support) were added on top of orig series, primarily to record the revision history. This updated pull request additionally contains - fixes due to our GNU tools catching up with the new syscall/ptrace ABI - some (minor) cross-arch Kconfig updates." * tag 'arc-v3.9-rc1-late' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc: (82 commits) ARC: split elf.h into uapi and export it for userspace ARC: Fixup the current ABI version ARC: gdbserver using regset interface possibly broken ARC: Kconfig cleanup tracking cross-arch Kconfig pruning in merge window ARC: make a copy of flat DT ARC: [plat-arcfpga] DT arc-uart bindings change: "baud" => "current-speed" ARC: Ensure CONFIG_VIRT_TO_BUS is not enabled ARC: Fix pt_orig_r8 access ARC: [3.9] Fallout of hlist iterator update ARC: 64bit RTSC timestamp hardware issue ARC: Don't fiddle with non-existent caches ARC: Add self to MAINTAINERS ARC: Provide a default serial.h for uart drivers needing BASE_BAUD ARC: [plat-arcfpga] defconfig for fully loaded ARC Linux ARC: [Review] Multi-platform image #8: platform registers SMP callbacks ARC: [Review] Multi-platform image #7: SMP common code to use callbacks ARC: [Review] Multi-platform image #6: cpu-to-dma-addr optional ARC: [Review] Multi-platform image #5: NR_IRQS defined by ARC core ARC: [Review] Multi-platform image #4: Isolate platform headers ARC: [Review] Multi-platform image #3: switch to board callback ...
Diffstat (limited to 'arch/arc/lib/strcpy-700.S')
-rw-r--r--arch/arc/lib/strcpy-700.S70
1 files changed, 70 insertions, 0 deletions
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
new file mode 100644
index 000000000000..b7ca4ae81d88
--- /dev/null
+++ b/arch/arc/lib/strcpy-700.S
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
+ If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
+ it 8 byte aligned. Thus, we can do a little read-ahead, without
+ dereferencing a cache line that we should not touch.
+ Note that short and long instructions have been scheduled to avoid
+ branch stalls.
+ The beq_s to r3z could be made unaligned & long to avoid a stall
+ there, but the it is not likely to be taken often, and it
+ would also be likey to cost an unaligned mispredict at the next call. */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strcpy
+ or r2,r0,r1
+ bmsk_s r2,r2,1
+ brne.d r2,0,charloop
+ mov_s r10,r0
+ ld_s r3,[r1,0]
+ mov r8,0x01010101
+ bbit0.d r1,2,loop_start
+ ror r12,r8
+ sub r2,r3,r8
+ bic_s r2,r2,r3
+ tst_s r2,r12
+ bne r3z
+ mov_s r4,r3
+ .balign 4
+loop:
+ ld.a r3,[r1,4]
+ st.ab r4,[r10,4]
+loop_start:
+ ld.a r4,[r1,4]
+ sub r2,r3,r8
+ bic_s r2,r2,r3
+ tst_s r2,r12
+ bne_s r3z
+ st.ab r3,[r10,4]
+ sub r2,r4,r8
+ bic r2,r2,r4
+ tst r2,r12
+ beq loop
+ mov_s r3,r4
+#ifdef __LITTLE_ENDIAN__
+r3z: bmsk.f r1,r3,7
+ lsr_s r3,r3,8
+#else
+r3z: lsr.f r1,r3,24
+ asl_s r3,r3,8
+#endif
+ bne.d r3z
+ stb.ab r1,[r10,1]
+ j_s [blink]
+
+ .balign 4
+charloop:
+ ldb.ab r3,[r1,1]
+
+
+ brne.d r3,0,charloop
+ stb.ab r3,[r10,1]
+ j [blink]
+ARC_EXIT strcpy