1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
From 711c7bc5a07b62e8369bc76a9db265c960bacef8 Mon Sep 17 00:00:00 2001
From: Vernon Mauery <vernon.mauery@linux.intel.com>
Date: Mon, 19 Nov 2018 11:04:02 -0800
Subject: [PATCH] Rewrite memmove to optimize on word transfers
Reading from the flash at boot time was using byte-sized transfers,
which ultimately turns into four word transfers over spi for every real
word read. This change breaks memmove down into a header, body, and
trailer, where the body is all done with word-sized transfers.
Change-Id: Ie0a1f3261e507fb34a908571883d9bf04a1059ee
Signed-off-by: Vernon Mauery <vernon.mauery@linux.intel.com>
---
lib/string.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 64 insertions(+), 13 deletions(-)
diff --git a/lib/string.c b/lib/string.c
index 67d5f6a..0bf472f 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -505,26 +505,77 @@ void * memcpy(void *dest, const void *src, size_t count)
*
* Unlike memcpy(), memmove() copes with overlapping areas.
*/
-void * memmove(void * dest,const void *src,size_t count)
+void *memmove(void *dest, const void *src, size_t count)
{
- char *tmp, *s;
-
- if (src == dest)
+ unsigned char *bdst = (unsigned char *)dest;
+ const unsigned char *bsrc = (const unsigned char *)src;
+ unsigned long *ldst;
+ const unsigned long *lsrc;
+ size_t unaligned_header = 0, unaligned_trailer = 0;
+ size_t unaligned_src =
+ (sizeof(*ldst) - (size_t)src) & (sizeof(*ldst) - 1);
+ size_t unaligned_dst =
+ (sizeof(*ldst) - (size_t)dest) & (sizeof(*ldst) - 1);
+
+ if (src == dest || !count)
return dest;
+ if (unaligned_src || unaligned_dst) {
+ if (unaligned_dst != unaligned_src) {
+ unaligned_header = count;
+ } else {
+ unaligned_header = unaligned_src;
+ if (unaligned_header > count) {
+ unaligned_header = count;
+ }
+ }
+ count -= unaligned_header;
+ }
+ if (count & (sizeof(*ldst) - 1)) {
+ unaligned_trailer = count & (sizeof(*ldst) - 1);
+ count -= unaligned_trailer;
+ }
+
if (dest <= src) {
- tmp = (char *) dest;
- s = (char *) src;
- while (count--)
- *tmp++ = *s++;
+ /* possible un-aligned bytes */
+ while (unaligned_header--)
+ *bdst++ = *bsrc++;
+
+ /* aligned words */
+ ldst = (unsigned long *)bdst;
+ lsrc = (const unsigned long *)bsrc;
+ while (count >= sizeof(*ldst)) {
+ count -= sizeof(*ldst);
+ *ldst++ = *lsrc++;
}
- else {
- tmp = (char *) dest + count;
- s = (char *) src + count;
- while (count--)
- *--tmp = *--s;
+
+ /* possibly un-aligned bytes */
+ bdst = (unsigned char *)ldst;
+ bsrc = (const unsigned char *)lsrc;
+ while (unaligned_trailer--)
+ *bdst++ = *bsrc++;
+ } else {
+ bdst += unaligned_header + count + unaligned_trailer;
+ bsrc += unaligned_header + count + unaligned_trailer;
+
+ /* possibly un-aligned bytes */
+ while (unaligned_trailer--)
+ *--bdst = *--bsrc;
+
+ /* aligned words */
+ ldst = (unsigned long *)bdst;
+ lsrc = (unsigned long *)bsrc;
+ while (count >= sizeof(*ldst)) {
+ count -= sizeof(*ldst);
+ *--ldst = *--lsrc;
}
+ /* possibly un-aligned bytes */
+ bdst = (unsigned char *)ldst;
+ bsrc = (const unsigned char *)lsrc;
+ while (unaligned_header--)
+ *--bdst = *--bsrc;
+ }
return dest;
}
#endif
|