summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2016-12-07 15:45:38 +0300
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2016-12-12 14:11:32 +0300
commitb4623d4e5b2370fcf1200cbf832aaa53f6e96ef3 (patch)
tree23f0a9923f2f77cafc9ac097b1c9dd350ec0a55b /arch/s390
parent82897ede9235d31c50074ce1da81828aa2f3d70c (diff)
downloadlinux-b4623d4e5b2370fcf1200cbf832aaa53f6e96ef3.tar.xz
s390: provide memmove implementation
Provide an s390 specific memmove implementation which is faster than the generic implementation which copies byte-wise. For non-destructive (as defined by the mvc instruction) memmove operations the following table compares the old default implementation versus the new s390 specific implementation: size old new 1 1ns 8ns 2 2ns 8ns 4 4ns 8ns 8 7ns 8ns 16 17ns 8ns 32 35ns 8ns 64 65ns 9ns 128 146ns 10ns 256 298ns 11ns 512 537ns 11ns 1024 1193ns 19ns 2048 2405ns 36ns So only for very small sizes the old implementation is faster. For overlapping memmoves, where the mvc instruction can't be used, the new implementation is as slow as the old one. Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/include/asm/string.h3
-rw-r--r--arch/s390/lib/mem.S39
2 files changed, 41 insertions, 1 deletions
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index 8662f5c8e17f..15a3c005c274 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -14,6 +14,7 @@
#define __HAVE_ARCH_MEMCHR /* inline & arch function */
#define __HAVE_ARCH_MEMCMP /* arch function */
#define __HAVE_ARCH_MEMCPY /* gcc builtin & arch function */
+#define __HAVE_ARCH_MEMMOVE /* gcc builtin & arch function */
#define __HAVE_ARCH_MEMSCAN /* inline & arch function */
#define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */
#define __HAVE_ARCH_STRCAT /* inline & arch function */
@@ -32,6 +33,7 @@
extern int memcmp(const void *, const void *, size_t);
extern void *memcpy(void *, const void *, size_t);
extern void *memset(void *, int, size_t);
+extern void *memmove(void *, const void *, size_t);
extern int strcmp(const char *,const char *);
extern size_t strlcat(char *, const char *, size_t);
extern size_t strlcpy(char *, const char *, size_t);
@@ -40,7 +42,6 @@ extern char *strncpy(char *, const char *, size_t);
extern char *strrchr(const char *, int);
extern char *strstr(const char *, const char *);
-#undef __HAVE_ARCH_MEMMOVE
#undef __HAVE_ARCH_STRCHR
#undef __HAVE_ARCH_STRNCHR
#undef __HAVE_ARCH_STRNCMP
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index be9fa65bfac4..7422a706f310 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -8,6 +8,45 @@
#include <asm/export.h>
/*
+ * void *memmove(void *dest, const void *src, size_t n)
+ */
+ENTRY(memmove)
+ ltgr %r4,%r4
+ lgr %r1,%r2
+ bzr %r14
+ clgr %r2,%r3
+ jnh .Lmemmove_forward
+ la %r5,0(%r4,%r3)
+ clgr %r2,%r5
+ jl .Lmemmove_reverse
+.Lmemmove_forward:
+ aghi %r4,-1
+ srlg %r0,%r4,8
+ ltgr %r0,%r0
+ jz .Lmemmove_rest
+.Lmemmove_loop:
+ mvc 0(256,%r1),0(%r3)
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ brctg %r0,.Lmemmove_loop
+.Lmemmove_rest:
+ larl %r5,.Lmemmove_mvc
+ ex %r4,0(%r5)
+ br %r14
+.Lmemmove_reverse:
+ aghi %r4,-1
+.Lmemmove_reverse_loop:
+ ic %r0,0(%r4,%r3)
+ stc %r0,0(%r4,%r1)
+ brctg %r4,.Lmemmove_reverse_loop
+ ic %r0,0(%r4,%r3)
+ stc %r0,0(%r4,%r1)
+ br %r14
+.Lmemmove_mvc:
+ mvc 0(1,%r1),0(%r3)
+EXPORT_SYMBOL(memmove)
+
+/*
* memset implementation
*
* This code corresponds to the C construct below. We do distinguish