2 * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions
3 * xthal_memcpy and xthal_bcopy
5 * This file is subject to the terms and conditions of the GNU General Public
6 * License. See the file "COPYING" in the main directory of this archive
9 * Copyright (C) 2002 - 2012 Tensilica Inc.
12 #include <variant/core.h>
14 .macro src_b r, w0, w1
31 * void *memcpy(void *dst, const void *src, size_t len);
33 * This function is intended to do the same thing as the standard
34 * library function memcpy() for most cases.
35 * However, where the source and/or destination references
36 * an instruction RAM or ROM or a data RAM or ROM, that
37 * source and/or destination will always be accessed with
38 * 32-bit load and store instructions (as required for these
42 * !!!!!!! Handling of IRAM/IROM has not yet
43 * !!!!!!! been implemented.
45 * The (general case) algorithm is as follows:
46 * If destination is unaligned, align it by conditionally
47 * copying 1 and 2 bytes.
48 * If source is aligned,
49 * do 16 bytes with a loop, and then finish up with
50 * 8, 4, 2, and 1 byte copies conditional on the length;
51 * else (if source is unaligned),
52 * do the same, but use SRC to align the source data.
53 * This code tries to use fall-through branches for the common
54 * case of aligned source and destination and multiple
78 .byte 0 # 1 mod 4 alignment for LOOPNEZ
79 # (0 mod 4 alignment for LBEG)
82 loopnez a4, .Lbytecopydone
83 #else /* !XCHAL_HAVE_LOOPS */
84 beqz a4, .Lbytecopydone
85 add a7, a3, a4 # a7 = end address for source
86 #endif /* !XCHAL_HAVE_LOOPS */
93 bne a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end
94 #endif /* !XCHAL_HAVE_LOOPS */
99 * Destination is unaligned
103 .Ldst1mod2: # dst is only byte aligned
104 _bltui a4, 7, .Lbytecopy # do short copies byte by byte
112 _bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
113 # return to main algorithm
114 .Ldst2mod4: # dst 16-bit aligned
116 _bltui a4, 6, .Lbytecopy # do short copies byte by byte
124 j .Ldstaligned # dst is now aligned, return to main algorithm
128 .type memcpy,@function
131 entry sp, 16 # minimal stack frame
132 # a2/ dst, a3/ src, a4/ len
133 mov a5, a2 # copy dst so that a2 is return value
135 _bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
136 _bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
137 .Ldstaligned: # return here from .Ldst?mod? once dst is aligned
138 srli a7, a4, 4 # number of loop iterations with 16B
140 movi a8, 3 # if source is not aligned,
141 _bany a3, a8, .Lsrcunaligned # then use shifting copy
143 * Destination and source are word-aligned, use word copy.
145 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
147 loopnez a7, .Loop1done
148 #else /* !XCHAL_HAVE_LOOPS */
151 add a8, a8, a3 # a8 = end of last 16B source chunk
152 #endif /* !XCHAL_HAVE_LOOPS */
164 #if !XCHAL_HAVE_LOOPS
165 bne a3, a8, .Loop1 # continue loop if a3:src != a8:src_end
166 #endif /* !XCHAL_HAVE_LOOPS */
205 * Destination is aligned, Source is unaligned
210 _beqz a4, .Ldone # avoid loading anything for zero-length copies
211 # copy 16 bytes per iteration for word-aligned dst and unaligned src
212 ssa8 a3 # set shift amount from byte offset
213 #define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS (simulator) with the
214 lint or ferret client, or 0 to save a few cycles */
215 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
216 and a11, a3, a8 # save unalignment offset for below
217 sub a3, a3, a11 # align a3
219 l32i a6, a3, 0 # load first word
221 loopnez a7, .Loop2done
222 #else /* !XCHAL_HAVE_LOOPS */
225 add a10, a10, a3 # a10 = end of last 16B source chunk
226 #endif /* !XCHAL_HAVE_LOOPS */
242 #if !XCHAL_HAVE_LOOPS
243 bne a3, a10, .Loop2 # continue loop if a3:src != a10:src_end
244 #endif /* !XCHAL_HAVE_LOOPS */
267 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
268 add a3, a3, a11 # readjust a3 with correct misalignment
291 * void bcopy(const void *src, void *dest, size_t n);
295 .type bcopy,@function
297 entry sp, 16 # minimal stack frame
298 # a2=src, a3=dst, a4=len
302 j .Lmovecommon # go to common code for memmove+bcopy
305 * void *memmove(void *dst, const void *src, size_t len);
307 * This function is intended to do the same thing as the standard
308 * library function memmove() for most cases.
309 * However, where the source and/or destination references
310 * an instruction RAM or ROM or a data RAM or ROM, that
311 * source and/or destination will always be accessed with
312 * 32-bit load and store instructions (as required for these
316 * !!!!!!! Handling of IRAM/IROM has not yet
317 * !!!!!!! been implemented.
319 * The (general case) algorithm is as follows:
320 * If end of source doesn't overlap destination then use memcpy.
321 * Otherwise do memcpy backwards.
342 .byte 0 # 1 mod 4 alignment for LOOPNEZ
343 # (0 mod 4 alignment for LBEG)
346 loopnez a4, .Lbackbytecopydone
347 #else /* !XCHAL_HAVE_LOOPS */
348 beqz a4, .Lbackbytecopydone
349 sub a7, a3, a4 # a7 = start address for source
350 #endif /* !XCHAL_HAVE_LOOPS */
356 #if !XCHAL_HAVE_LOOPS
357 bne a3, a7, .Lbacknextbyte # continue loop if
358 # $a3:src != $a7:src_start
359 #endif /* !XCHAL_HAVE_LOOPS */
364 * Destination is unaligned
368 .Lbackdst1mod2: # dst is only byte aligned
369 _bltui a4, 7, .Lbackbytecopy # do short copies byte by byte
377 _bbci.l a5, 1, .Lbackdstaligned # if dst is now aligned, then
378 # return to main algorithm
379 .Lbackdst2mod4: # dst 16-bit aligned
381 _bltui a4, 6, .Lbackbytecopy # do short copies byte by byte
389 j .Lbackdstaligned # dst is now aligned,
390 # return to main algorithm
394 .type memmove,@function
397 entry sp, 16 # minimal stack frame
398 # a2/ dst, a3/ src, a4/ len
399 mov a5, a2 # copy dst so that a2 is return value
402 bgeu a6, a4, .Lcommon
407 _bbsi.l a5, 0, .Lbackdst1mod2 # if dst is 1 mod 2
408 _bbsi.l a5, 1, .Lbackdst2mod4 # if dst is 2 mod 4
409 .Lbackdstaligned: # return here from .Lbackdst?mod? once dst is aligned
410 srli a7, a4, 4 # number of loop iterations with 16B
412 movi a8, 3 # if source is not aligned,
413 _bany a3, a8, .Lbacksrcunaligned # then use shifting copy
415 * Destination and source are word-aligned, use word copy.
417 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
419 loopnez a7, .backLoop1done
420 #else /* !XCHAL_HAVE_LOOPS */
421 beqz a7, .backLoop1done
423 sub a8, a3, a8 # a8 = start of first 16B source chunk
424 #endif /* !XCHAL_HAVE_LOOPS */
436 #if !XCHAL_HAVE_LOOPS
437 bne a3, a8, .backLoop1 # continue loop if a3:src != a8:src_start
438 #endif /* !XCHAL_HAVE_LOOPS */
440 bbci.l a4, 3, .Lback2
449 bbsi.l a4, 2, .Lback3
450 bbsi.l a4, 1, .Lback4
451 bbsi.l a4, 0, .Lback5
459 bbsi.l a4, 1, .Lback4
460 bbsi.l a4, 0, .Lback5
468 bbsi.l a4, 0, .Lback5
479 * Destination is aligned, Source is unaligned
484 _beqz a4, .Lbackdone # avoid loading anything for zero-length copies
485 # copy 16 bytes per iteration for word-aligned dst and unaligned src
486 ssa8 a3 # set shift amount from byte offset
487 #define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with
488 * the lint or ferret client, or 0
489 * to save a few cycles */
490 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
491 and a11, a3, a8 # save unalignment offset for below
492 sub a3, a3, a11 # align a3
494 l32i a6, a3, 0 # load first word
496 loopnez a7, .backLoop2done
497 #else /* !XCHAL_HAVE_LOOPS */
498 beqz a7, .backLoop2done
500 sub a10, a3, a10 # a10 = start of first 16B source chunk
501 #endif /* !XCHAL_HAVE_LOOPS */
517 #if !XCHAL_HAVE_LOOPS
518 bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
519 #endif /* !XCHAL_HAVE_LOOPS */
521 bbci.l a4, 3, .Lback12
533 bbci.l a4, 2, .Lback13
542 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
543 add a3, a3, a11 # readjust a3 with correct misalignment
545 bbsi.l a4, 1, .Lback14
546 bbsi.l a4, 0, .Lback15
557 bbsi.l a4, 0, .Lback15
571 * comment-start: "# "
572 * comment-start-skip: "# *"