Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* memcpy.S: Sparc optimized memcpy and memmove code |
2 | * Hand optimized from GNU libc's memcpy and memmove | |
3 | * Copyright (C) 1991,1996 Free Software Foundation | |
4 | * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) | |
5 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | |
6 | * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) | |
7 | * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | |
8 | */ | |
9 | ||
d3867f04 | 10 | #include <asm/export.h> |
045b7de9 | 11 | #define FUNC(x) \ |
1da177e4 LT |
12 | .globl x; \ |
13 | .type x,@function; \ | |
045b7de9 | 14 | .align 4; \ |
1da177e4 LT |
15 | x: |
16 | ||
1da177e4 LT |
17 | /* Both these macros have to start with exactly the same insn */ |
18 | #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | |
19 | ldd [%src + (offset) + 0x00], %t0; \ | |
20 | ldd [%src + (offset) + 0x08], %t2; \ | |
21 | ldd [%src + (offset) + 0x10], %t4; \ | |
22 | ldd [%src + (offset) + 0x18], %t6; \ | |
23 | st %t0, [%dst + (offset) + 0x00]; \ | |
24 | st %t1, [%dst + (offset) + 0x04]; \ | |
25 | st %t2, [%dst + (offset) + 0x08]; \ | |
26 | st %t3, [%dst + (offset) + 0x0c]; \ | |
27 | st %t4, [%dst + (offset) + 0x10]; \ | |
28 | st %t5, [%dst + (offset) + 0x14]; \ | |
29 | st %t6, [%dst + (offset) + 0x18]; \ | |
30 | st %t7, [%dst + (offset) + 0x1c]; | |
31 | ||
32 | #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | |
33 | ldd [%src + (offset) + 0x00], %t0; \ | |
34 | ldd [%src + (offset) + 0x08], %t2; \ | |
35 | ldd [%src + (offset) + 0x10], %t4; \ | |
36 | ldd [%src + (offset) + 0x18], %t6; \ | |
37 | std %t0, [%dst + (offset) + 0x00]; \ | |
38 | std %t2, [%dst + (offset) + 0x08]; \ | |
39 | std %t4, [%dst + (offset) + 0x10]; \ | |
40 | std %t6, [%dst + (offset) + 0x18]; | |
41 | ||
42 | #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | |
43 | ldd [%src - (offset) - 0x10], %t0; \ | |
44 | ldd [%src - (offset) - 0x08], %t2; \ | |
45 | st %t0, [%dst - (offset) - 0x10]; \ | |
46 | st %t1, [%dst - (offset) - 0x0c]; \ | |
47 | st %t2, [%dst - (offset) - 0x08]; \ | |
48 | st %t3, [%dst - (offset) - 0x04]; | |
49 | ||
50 | #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ | |
51 | ldd [%src - (offset) - 0x10], %t0; \ | |
52 | ldd [%src - (offset) - 0x08], %t2; \ | |
53 | std %t0, [%dst - (offset) - 0x10]; \ | |
54 | std %t2, [%dst - (offset) - 0x08]; | |
55 | ||
56 | #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ | |
57 | ldub [%src - (offset) - 0x02], %t0; \ | |
58 | ldub [%src - (offset) - 0x01], %t1; \ | |
59 | stb %t0, [%dst - (offset) - 0x02]; \ | |
60 | stb %t1, [%dst - (offset) - 0x01]; | |
61 | ||
1da177e4 LT |
62 | .text |
63 | .align 4 | |
64 | ||
1da177e4 | 65 | FUNC(memmove) |
d3867f04 | 66 | EXPORT_SYMBOL(memmove) |
1da177e4 | 67 | cmp %o0, %o1 |
a52312b8 | 68 | mov %o0, %g7 |
1da177e4 LT |
69 | bleu 9f |
70 | sub %o0, %o1, %o4 | |
71 | ||
72 | add %o1, %o2, %o3 | |
73 | cmp %o3, %o0 | |
74 | bleu 0f | |
75 | andcc %o4, 3, %o5 | |
76 | ||
1da177e4 LT |
77 | add %o1, %o2, %o1 |
78 | add %o0, %o2, %o0 | |
79 | sub %o1, 1, %o1 | |
80 | sub %o0, 1, %o0 | |
81 | ||
82 | 1: /* reverse_bytes */ | |
83 | ||
84 | ldub [%o1], %o4 | |
85 | subcc %o2, 1, %o2 | |
86 | stb %o4, [%o0] | |
87 | sub %o1, 1, %o1 | |
88 | bne 1b | |
89 | sub %o0, 1, %o0 | |
90 | ||
91 | retl | |
a52312b8 | 92 | mov %g7, %o0 |
1da177e4 | 93 | |
1da177e4 LT |
94 | /* NOTE: This code is executed just for the cases, |
95 | where %src (=%o1) & 3 is != 0. | |
96 | We need to align it to 4. So, for (%src & 3) | |
97 | 1 we need to do ldub,lduh | |
98 | 2 lduh | |
99 | 3 just ldub | |
100 | so even if it looks weird, the branches | |
101 | are correct here. -jj | |
102 | */ | |
103 | 78: /* dword_align */ | |
104 | ||
105 | andcc %o1, 1, %g0 | |
106 | be 4f | |
107 | andcc %o1, 2, %g0 | |
108 | ||
109 | ldub [%o1], %g2 | |
110 | add %o1, 1, %o1 | |
111 | stb %g2, [%o0] | |
112 | sub %o2, 1, %o2 | |
113 | bne 3f | |
114 | add %o0, 1, %o0 | |
115 | 4: | |
116 | lduh [%o1], %g2 | |
117 | add %o1, 2, %o1 | |
118 | sth %g2, [%o0] | |
119 | sub %o2, 2, %o2 | |
120 | b 3f | |
121 | add %o0, 2, %o0 | |
122 | ||
1da177e4 | 123 | FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ |
d3867f04 | 124 | EXPORT_SYMBOL(memcpy) |
1da177e4 LT |
125 | |
126 | sub %o0, %o1, %o4 | |
a52312b8 | 127 | mov %o0, %g7 |
1da177e4 LT |
128 | 9: |
129 | andcc %o4, 3, %o5 | |
130 | 0: | |
131 | bne 86f | |
132 | cmp %o2, 15 | |
133 | ||
134 | bleu 90f | |
135 | andcc %o1, 3, %g0 | |
136 | ||
137 | bne 78b | |
138 | 3: | |
139 | andcc %o1, 4, %g0 | |
140 | ||
141 | be 2f | |
142 | mov %o2, %g1 | |
143 | ||
144 | ld [%o1], %o4 | |
145 | sub %g1, 4, %g1 | |
146 | st %o4, [%o0] | |
147 | add %o1, 4, %o1 | |
148 | add %o0, 4, %o0 | |
149 | 2: | |
21f74d36 | 150 | andcc %g1, 0xffffff80, %g0 |
1da177e4 LT |
151 | be 3f |
152 | andcc %o0, 4, %g0 | |
153 | ||
154 | be 82f + 4 | |
155 | 5: | |
156 | MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | |
157 | MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | |
158 | MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | |
159 | MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | |
21f74d36 | 160 | sub %g1, 128, %g1 |
1da177e4 | 161 | add %o1, 128, %o1 |
21f74d36 DM |
162 | cmp %g1, 128 |
163 | bge 5b | |
1da177e4 LT |
164 | add %o0, 128, %o0 |
165 | 3: | |
21f74d36 | 166 | andcc %g1, 0x70, %g4 |
1da177e4 LT |
167 | be 80f |
168 | andcc %g1, 8, %g0 | |
169 | ||
170 | sethi %hi(80f), %o5 | |
21f74d36 DM |
171 | srl %g4, 1, %o4 |
172 | add %g4, %o4, %o4 | |
173 | add %o1, %g4, %o1 | |
1da177e4 LT |
174 | sub %o5, %o4, %o5 |
175 | jmpl %o5 + %lo(80f), %g0 | |
21f74d36 | 176 | add %o0, %g4, %o0 |
1da177e4 LT |
177 | |
178 | 79: /* memcpy_table */ | |
179 | ||
180 | MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | |
181 | MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | |
182 | MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | |
183 | MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | |
184 | MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | |
185 | MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | |
186 | MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | |
187 | ||
188 | 80: /* memcpy_table_end */ | |
189 | be 81f | |
190 | andcc %g1, 4, %g0 | |
191 | ||
192 | ldd [%o1], %g2 | |
193 | add %o0, 8, %o0 | |
194 | st %g2, [%o0 - 0x08] | |
195 | add %o1, 8, %o1 | |
196 | st %g3, [%o0 - 0x04] | |
197 | ||
198 | 81: /* memcpy_last7 */ | |
199 | ||
200 | be 1f | |
201 | andcc %g1, 2, %g0 | |
202 | ||
203 | ld [%o1], %g2 | |
204 | add %o1, 4, %o1 | |
205 | st %g2, [%o0] | |
206 | add %o0, 4, %o0 | |
207 | 1: | |
208 | be 1f | |
209 | andcc %g1, 1, %g0 | |
210 | ||
211 | lduh [%o1], %g2 | |
212 | add %o1, 2, %o1 | |
213 | sth %g2, [%o0] | |
214 | add %o0, 2, %o0 | |
215 | 1: | |
216 | be 1f | |
217 | nop | |
218 | ||
219 | ldub [%o1], %g2 | |
220 | stb %g2, [%o0] | |
221 | 1: | |
222 | retl | |
a52312b8 | 223 | mov %g7, %o0 |
1da177e4 LT |
224 | |
225 | 82: /* ldd_std */ | |
226 | MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | |
227 | MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | |
228 | MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | |
229 | MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | |
21f74d36 | 230 | subcc %g1, 128, %g1 |
1da177e4 | 231 | add %o1, 128, %o1 |
21f74d36 DM |
232 | cmp %g1, 128 |
233 | bge 82b | |
1da177e4 LT |
234 | add %o0, 128, %o0 |
235 | ||
21f74d36 | 236 | andcc %g1, 0x70, %g4 |
1da177e4 LT |
237 | be 84f |
238 | andcc %g1, 8, %g0 | |
239 | ||
240 | sethi %hi(84f), %o5 | |
21f74d36 DM |
241 | add %o1, %g4, %o1 |
242 | sub %o5, %g4, %o5 | |
1da177e4 | 243 | jmpl %o5 + %lo(84f), %g0 |
21f74d36 | 244 | add %o0, %g4, %o0 |
1da177e4 LT |
245 | |
246 | 83: /* amemcpy_table */ | |
247 | ||
248 | MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | |
249 | MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | |
250 | MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | |
251 | MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | |
252 | MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | |
253 | MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | |
254 | MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | |
255 | ||
256 | 84: /* amemcpy_table_end */ | |
257 | be 85f | |
258 | andcc %g1, 4, %g0 | |
259 | ||
260 | ldd [%o1], %g2 | |
261 | add %o0, 8, %o0 | |
262 | std %g2, [%o0 - 0x08] | |
263 | add %o1, 8, %o1 | |
264 | 85: /* amemcpy_last7 */ | |
265 | be 1f | |
266 | andcc %g1, 2, %g0 | |
267 | ||
268 | ld [%o1], %g2 | |
269 | add %o1, 4, %o1 | |
270 | st %g2, [%o0] | |
271 | add %o0, 4, %o0 | |
272 | 1: | |
273 | be 1f | |
274 | andcc %g1, 1, %g0 | |
275 | ||
276 | lduh [%o1], %g2 | |
277 | add %o1, 2, %o1 | |
278 | sth %g2, [%o0] | |
279 | add %o0, 2, %o0 | |
280 | 1: | |
281 | be 1f | |
282 | nop | |
283 | ||
284 | ldub [%o1], %g2 | |
285 | stb %g2, [%o0] | |
286 | 1: | |
287 | retl | |
a52312b8 | 288 | mov %g7, %o0 |
1da177e4 | 289 | |
1da177e4 LT |
290 | 86: /* non_aligned */ |
291 | cmp %o2, 6 | |
292 | bleu 88f | |
21f74d36 DM |
293 | nop |
294 | ||
295 | save %sp, -96, %sp | |
296 | andcc %i0, 3, %g0 | |
1da177e4 | 297 | be 61f |
21f74d36 | 298 | andcc %i0, 1, %g0 |
1da177e4 | 299 | be 60f |
21f74d36 | 300 | andcc %i0, 2, %g0 |
1da177e4 | 301 | |
21f74d36 DM |
302 | ldub [%i1], %g5 |
303 | add %i1, 1, %i1 | |
304 | stb %g5, [%i0] | |
305 | sub %i2, 1, %i2 | |
1da177e4 | 306 | bne 61f |
21f74d36 | 307 | add %i0, 1, %i0 |
1da177e4 | 308 | 60: |
21f74d36 DM |
309 | ldub [%i1], %g3 |
310 | add %i1, 2, %i1 | |
311 | stb %g3, [%i0] | |
312 | sub %i2, 2, %i2 | |
313 | ldub [%i1 - 1], %g3 | |
314 | add %i0, 2, %i0 | |
315 | stb %g3, [%i0 - 1] | |
1da177e4 | 316 | 61: |
21f74d36 DM |
317 | and %i1, 3, %g2 |
318 | and %i2, 0xc, %g3 | |
319 | and %i1, -4, %i1 | |
1da177e4 LT |
320 | cmp %g3, 4 |
321 | sll %g2, 3, %g4 | |
322 | mov 32, %g2 | |
323 | be 4f | |
21f74d36 | 324 | sub %g2, %g4, %l0 |
1da177e4 LT |
325 | |
326 | blu 3f | |
327 | cmp %g3, 0x8 | |
328 | ||
329 | be 2f | |
21f74d36 | 330 | srl %i2, 2, %g3 |
1da177e4 | 331 | |
21f74d36 DM |
332 | ld [%i1], %i3 |
333 | add %i0, -8, %i0 | |
334 | ld [%i1 + 4], %i4 | |
1da177e4 LT |
335 | b 8f |
336 | add %g3, 1, %g3 | |
337 | 2: | |
21f74d36 DM |
338 | ld [%i1], %i4 |
339 | add %i0, -12, %i0 | |
340 | ld [%i1 + 4], %i5 | |
1da177e4 LT |
341 | add %g3, 2, %g3 |
342 | b 9f | |
21f74d36 | 343 | add %i1, -4, %i1 |
1da177e4 | 344 | 3: |
21f74d36 DM |
345 | ld [%i1], %g1 |
346 | add %i0, -4, %i0 | |
347 | ld [%i1 + 4], %i3 | |
348 | srl %i2, 2, %g3 | |
1da177e4 | 349 | b 7f |
21f74d36 | 350 | add %i1, 4, %i1 |
1da177e4 | 351 | 4: |
21f74d36 DM |
352 | ld [%i1], %i5 |
353 | cmp %i2, 7 | |
354 | ld [%i1 + 4], %g1 | |
355 | srl %i2, 2, %g3 | |
1da177e4 | 356 | bleu 10f |
21f74d36 | 357 | add %i1, 8, %i1 |
1da177e4 | 358 | |
21f74d36 | 359 | ld [%i1], %i3 |
1da177e4 LT |
360 | add %g3, -1, %g3 |
361 | 5: | |
21f74d36 DM |
362 | sll %i5, %g4, %g2 |
363 | srl %g1, %l0, %g5 | |
1da177e4 | 364 | or %g2, %g5, %g2 |
21f74d36 | 365 | st %g2, [%i0] |
1da177e4 | 366 | 7: |
21f74d36 | 367 | ld [%i1 + 4], %i4 |
1da177e4 | 368 | sll %g1, %g4, %g2 |
21f74d36 | 369 | srl %i3, %l0, %g5 |
1da177e4 | 370 | or %g2, %g5, %g2 |
21f74d36 | 371 | st %g2, [%i0 + 4] |
1da177e4 | 372 | 8: |
21f74d36 DM |
373 | ld [%i1 + 8], %i5 |
374 | sll %i3, %g4, %g2 | |
375 | srl %i4, %l0, %g5 | |
1da177e4 | 376 | or %g2, %g5, %g2 |
21f74d36 | 377 | st %g2, [%i0 + 8] |
1da177e4 | 378 | 9: |
21f74d36 DM |
379 | ld [%i1 + 12], %g1 |
380 | sll %i4, %g4, %g2 | |
381 | srl %i5, %l0, %g5 | |
1da177e4 LT |
382 | addcc %g3, -4, %g3 |
383 | or %g2, %g5, %g2 | |
21f74d36 DM |
384 | add %i1, 16, %i1 |
385 | st %g2, [%i0 + 12] | |
386 | add %i0, 16, %i0 | |
1da177e4 | 387 | bne,a 5b |
21f74d36 | 388 | ld [%i1], %i3 |
1da177e4 | 389 | 10: |
21f74d36 DM |
390 | sll %i5, %g4, %g2 |
391 | srl %g1, %l0, %g5 | |
392 | srl %l0, 3, %g3 | |
1da177e4 | 393 | or %g2, %g5, %g2 |
21f74d36 DM |
394 | sub %i1, %g3, %i1 |
395 | andcc %i2, 2, %g0 | |
396 | st %g2, [%i0] | |
1da177e4 | 397 | be 1f |
21f74d36 DM |
398 | andcc %i2, 1, %g0 |
399 | ||
400 | ldub [%i1], %g2 | |
401 | add %i1, 2, %i1 | |
402 | stb %g2, [%i0 + 4] | |
403 | add %i0, 2, %i0 | |
404 | ldub [%i1 - 1], %g2 | |
405 | stb %g2, [%i0 + 3] | |
1da177e4 LT |
406 | 1: |
407 | be 1f | |
408 | nop | |
21f74d36 DM |
409 | ldub [%i1], %g2 |
410 | stb %g2, [%i0 + 4] | |
1da177e4 | 411 | 1: |
21f74d36 | 412 | ret |
a52312b8 | 413 | restore %g7, %g0, %o0 |
1da177e4 | 414 | |
1da177e4 LT |
415 | 88: /* short_end */ |
416 | ||
417 | and %o2, 0xe, %o3 | |
418 | 20: | |
419 | sethi %hi(89f), %o5 | |
420 | sll %o3, 3, %o4 | |
421 | add %o0, %o3, %o0 | |
422 | sub %o5, %o4, %o5 | |
423 | add %o1, %o3, %o1 | |
424 | jmpl %o5 + %lo(89f), %g0 | |
425 | andcc %o2, 1, %g0 | |
426 | ||
427 | MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) | |
428 | MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) | |
429 | MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) | |
430 | MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) | |
431 | MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) | |
432 | MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) | |
433 | MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) | |
434 | ||
435 | 89: /* short_table_end */ | |
436 | ||
437 | be 1f | |
438 | nop | |
439 | ||
440 | ldub [%o1], %g2 | |
441 | stb %g2, [%o0] | |
442 | 1: | |
443 | retl | |
a52312b8 | 444 | mov %g7, %o0 |
1da177e4 LT |
445 | |
446 | 90: /* short_aligned_end */ | |
447 | bne 88b | |
448 | andcc %o2, 8, %g0 | |
449 | ||
450 | be 1f | |
451 | andcc %o2, 4, %g0 | |
452 | ||
453 | ld [%o1 + 0x00], %g2 | |
454 | ld [%o1 + 0x04], %g3 | |
455 | add %o1, 8, %o1 | |
456 | st %g2, [%o0 + 0x00] | |
457 | st %g3, [%o0 + 0x04] | |
458 | add %o0, 8, %o0 | |
459 | 1: | |
460 | b 81b | |
461 | mov %o2, %g1 |