1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
|
From 6587a1cae28468f5a49659a39040f60e425827a7 Mon Sep 17 00:00:00 2001
From: Mahesh Bodapati <mbodapat@xilinx.com>
Date: Tue, 17 Nov 2020 13:06:41 +0530
Subject: [PATCH 09/11] [Patch,MicroBlaze] : Added MB-64 support to
strcmp/strcpy/strlen files Signed-off-by:Mahesh Bodapati<mbodapat@xilinx.com>
---
newlib/libc/machine/microblaze/strcmp.c | 63 ++++++++++++++++++++++++-
newlib/libc/machine/microblaze/strcpy.c | 57 ++++++++++++++++++++++
newlib/libc/machine/microblaze/strlen.c | 38 +++++++++++++++
3 files changed, 157 insertions(+), 1 deletion(-)
diff --git a/newlib/libc/machine/microblaze/strcmp.c b/newlib/libc/machine/microblaze/strcmp.c
index 3119d82c5..2cfef7388 100644
--- a/newlib/libc/machine/microblaze/strcmp.c
+++ b/newlib/libc/machine/microblaze/strcmp.c
@@ -133,6 +133,66 @@ strcmp (const char *s1,
#include "mb_endian.h"
+#ifdef __arch64__
+ asm volatile (" \n\
+ orl r9, r0, r0 /* Index register */ \n\
+check_alignment: \n\
+ andli r3, r5, 3 \n\
+ andli r4, r6, 3 \n\
+ beanei r3, try_align_args \n\
+ beanei r4, regular_strcmp /* At this point we don't have a choice */ \n\
+cmp_loop: \n"
+ LOAD4BYTES("r3", "r5", "r9")
+ LOAD4BYTES("r4", "r6", "r9")
+" \n\
+ pcmplbf r7, r3, r0 /* See if there is Null byte */ \n\
+ beanei r7, end_cmp_loop /* IF yes (r7 > 0) use byte compares in end_cmp_loop */ \n\
+ cmplu r7, r4, r3 /* ELSE compare whole word */ \n\
+ beanei r7, end_cmp \n\
+ addlik r9, r9, 4 /* delay slot */ \n\
+ breaid cmp_loop \n\
+ nop /* delay slot */ \n\
+end_cmp_loop: \n\
+ lbu r3, r5, r9 /* byte compare loop */ \n\
+ lbu r4, r6, r9 \n\
+ cmplu r7, r4, r3 /* Compare bytes */ \n\
+ beanei r7, end_cmp_early \n\
+ addlik r9, r9, 1 /* delay slot */ \n\
+ beaneid r3, end_cmp_loop /* If reached null on one string, terminate */ \n\
+ nop \n\
+end_cmp_early: \n\
+ orl r3, r0, r7 /* delay slot */ \n\
+ rtsd r15, 8 \n\
+ nop \n\
+try_align_args: \n\
+ xorl r7, r4, r3 \n\
+ beanei r7, regular_strcmp /* cannot align args */ \n\
+ rsublik r10, r3, 4 /* Number of initial bytes to align */ \n\
+align_loop: \n\
+ lbu r3, r5, r9 \n\
+ lbu r4, r6, r9 \n\
+ cmplu r7, r4, r3 \n\
+ beanei r7, end_cmp \n\
+ beaeqi r3, end_cmp \n\
+ addlik r10, r10, -1 \n\
+ addlik r9, r9, 1 \n\
+ beaeqid r10, cmp_loop \n\
+ nop \n\
+ breai align_loop \n\
+regular_strcmp: \n\
+ lbu r3, r5, r9 \n\
+ lbu r4, r6, r9 \n\
+ cmplu r7, r4, r3 \n\
+ beanei r7, end_cmp \n\
+ beaeqi r3, end_cmp \n\
+ addlik r9, r9, 1 \n\
+ breaid regular_strcmp \n\
+ nop \n\
+end_cmp: \n\
+ orl r3, r0, r7 \n\
+ rtsd r15, 8 \n\
+ nop /* Return strcmp result */");
+#else
asm volatile (" \n\
or r9, r0, r0 /* Index register */\n\
check_alignment: \n\
@@ -181,11 +241,12 @@ regular_strcmp:
bnei r7, end_cmp \n\
beqi r3, end_cmp \n\
brid regular_strcmp \n\
- addik r9, r9, 1 \n\
+ addik r9, r9, 1
end_cmp: \n\
rtsd r15, 8 \n\
or r3, r0, r7 /* Return strcmp result */");
+#endif
#endif /* ! HAVE_HW_PCMP */
}
diff --git a/newlib/libc/machine/microblaze/strcpy.c b/newlib/libc/machine/microblaze/strcpy.c
index 62072fa28..6dbc60d77 100644
--- a/newlib/libc/machine/microblaze/strcpy.c
+++ b/newlib/libc/machine/microblaze/strcpy.c
@@ -125,6 +125,62 @@ strcpy (char *__restrict dst0,
#else
#include "mb_endian.h"
+#ifdef __arch64__
+
+ asm volatile (" \n\
+ orl r9, r0, r0 /* Index register */ \n\
+check_alignment: \n\
+ andli r3, r5, 3 \n\
+ andli r4, r6, 3 \n\
+ beanei r3, try_align_args \n\
+ beanei r4, regular_strcpy /* At this point we dont have a choice */ \n\
+cpy_loop: \n"
+ LOAD4BYTES("r3", "r6", "r9")
+" \n\
+ pcmplbf r4, r0, r3 \n\
+ beanei r4, cpy_bytes /* If r4 != 0, then null present within string */\n"
+ STORE4BYTES("r3", "r5", "r9")
+" \n\
+ addlik r9, r9, 4 \n\
+ breaid cpy_loop \n\
+ nop \n\
+cpy_bytes: \n\
+ lbu r3, r6, r9 \n\
+ sb r3, r5, r9 \n\
+ addlik r4, r4, -1 \n\
+ addlik r9, r9, 1 /* delay slot */\n\
+ beaneid r4, cpy_bytes \n\
+ nop \n\
+cpy_null: \n\
+ orl r3, r0, r5 /* Return strcpy result */\n\
+ rtsd r15, 8 \n\
+ nop \n\
+try_align_args: \n\
+ xorl r7, r4, r3 \n\
+ beanei r7, regular_strcpy /* cannot align args */\n\
+ rsublik r10, r3, 4 /* Number of initial bytes to align */\n\
+align_loop: \n\
+ lbu r3, r6, r9 \n\
+ sb r3, r5, r9 \n\
+ addlik r10, r10, -1 \n\
+ beaeqid r3, end_cpy /* Break if we have seen null character */\n\
+ nop \n\
+ addlik r9, r9, 1 \n\
+ beaneid r10, align_loop \n\
+ nop \n\
+ breai cpy_loop \n\
+regular_strcpy: \n\
+ lbu r3, r6, r9 \n\
+ sb r3, r5, r9 \n\
+ addlik r9, r9, 1 \n\
+ beaneid r3, regular_strcpy \n\
+ nop \n\
+end_cpy: \n\
+ orl r3, r0, r5 \n\
+ rtsd r15, 8 \n\
+ nop /* Return strcpy result */");
+
+#else
asm volatile (" \n\
or r9, r0, r0 /* Index register */ \n\
@@ -171,6 +227,7 @@ regular_strcpy: \n\
end_cpy: \n\
rtsd r15, 8 \n\
or r3, r0, r5 /* Return strcpy result */");
+#endif
#endif /* ! HAVE_HW_PCMP */
}
diff --git a/newlib/libc/machine/microblaze/strlen.c b/newlib/libc/machine/microblaze/strlen.c
index acb4464bc..b6f2d3c13 100644
--- a/newlib/libc/machine/microblaze/strlen.c
+++ b/newlib/libc/machine/microblaze/strlen.c
@@ -116,6 +116,43 @@ strlen (const char *str)
#include "mb_endian.h"
+#ifdef __arch64__
+ asm volatile (" \n\
+ orl r9, r0, r0 /* Index register */ \n\
+check_alignment: \n\
+ andli r3, r5, 3 \n\
+ beanei r3, align_arg \n\
+len_loop: \n"
+ LOAD4BYTES("r3", "r5", "r9")
+" \n\
+ pcmplbf r4, r3, r0 \n\
+ beanei r4, end_len \n\
+ addlik r9, r9, 4 \n\
+ breaid len_loop \n\
+ nop \n\
+end_len: \n\
+ lbu r3, r5, r9 \n\
+ beaeqi r3, done_len \n\
+ addlik r9, r9, 1 \n\
+ breaid end_len \n\
+ nop \n\
+done_len: \n\
+ orl r3, r0, r9 /* Return len */ \n\
+ rtsd r15, 8 \n\
+ nop \n\
+align_arg: \n\
+ rsublik r10, r3, 4 \n\
+align_loop: \n\
+ lbu r3, r5, r9 \n\
+ addlik r10, r10, -1 \n\
+ beaeqid r3, done_len \n\
+ nop \n\
+ addlik r9, r9, 1 \n\
+ beaneid r10, align_loop \n\
+ nop \n\
+ breai len_loop");
+
+#else
asm volatile (" \n\
or r9, r0, r0 /* Index register */ \n\
check_alignment: \n\
@@ -146,5 +183,6 @@ align_loop: \n\
addik r9, r9, 1 \n\
bri len_loop");
+#endif
#endif /* ! HAVE_HW_PCMP */
}
--
2.17.1
|