Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/lib/memset.S | |
3 | * | |
4 | * Copyright (C) 1995-2000 Russell King | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | * | |
10 | * ASM optimised string functions | |
11 | */ | |
12 | #include <linux/linkage.h> | |
13 | #include <asm/assembler.h> | |
14 | ||
15 | .text | |
16 | .align 5 | |
1da177e4 LT |
17 | |
18 | ENTRY(memset) | |
418df63a NP |
19 | ands r3, r0, #3 @ 1 unaligned? |
20 | mov ip, r0 @ preserve r0 as return value | |
21 | bne 6f @ 1 | |
1da177e4 | 22 | /* |
455bd4c4 | 23 | * we know that the pointer in ip is aligned to a word boundary. |
1da177e4 | 24 | */ |
418df63a | 25 | 1: orr r1, r1, r1, lsl #8 |
1da177e4 LT |
26 | orr r1, r1, r1, lsl #16 |
27 | mov r3, r1 | |
28 | cmp r2, #16 | |
29 | blt 4f | |
f91a8dcc NP |
30 | |
31 | #if ! CALGN(1)+0 | |
32 | ||
1da177e4 | 33 | /* |
455bd4c4 | 34 | * We need 2 extra registers for this loop - use r8 and the LR |
1da177e4 | 35 | */ |
455bd4c4 ID |
36 | stmfd sp!, {r8, lr} |
37 | mov r8, r1 | |
1da177e4 LT |
38 | mov lr, r1 |
39 | ||
40 | 2: subs r2, r2, #64 | |
455bd4c4 ID |
41 | stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. |
42 | stmgeia ip!, {r1, r3, r8, lr} | |
43 | stmgeia ip!, {r1, r3, r8, lr} | |
44 | stmgeia ip!, {r1, r3, r8, lr} | |
1da177e4 | 45 | bgt 2b |
455bd4c4 | 46 | ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. |
1da177e4 LT |
47 | /* |
48 | * No need to correct the count; we're only testing bits from now on | |
49 | */ | |
50 | tst r2, #32 | |
455bd4c4 ID |
51 | stmneia ip!, {r1, r3, r8, lr} |
52 | stmneia ip!, {r1, r3, r8, lr} | |
1da177e4 | 53 | tst r2, #16 |
455bd4c4 ID |
54 | stmneia ip!, {r1, r3, r8, lr} |
55 | ldmfd sp!, {r8, lr} | |
1da177e4 | 56 | |
f91a8dcc NP |
57 | #else |
58 | ||
59 | /* | |
60 | * This version aligns the destination pointer in order to write | |
61 | * whole cache lines at once. | |
62 | */ | |
63 | ||
455bd4c4 | 64 | stmfd sp!, {r4-r8, lr} |
f91a8dcc NP |
65 | mov r4, r1 |
66 | mov r5, r1 | |
67 | mov r6, r1 | |
68 | mov r7, r1 | |
455bd4c4 | 69 | mov r8, r1 |
f91a8dcc NP |
70 | mov lr, r1 |
71 | ||
72 | cmp r2, #96 | |
455bd4c4 | 73 | tstgt ip, #31 |
f91a8dcc NP |
74 | ble 3f |
75 | ||
455bd4c4 ID |
76 | and r8, ip, #31 |
77 | rsb r8, r8, #32 | |
78 | sub r2, r2, r8 | |
79 | movs r8, r8, lsl #(32 - 4) | |
80 | stmcsia ip!, {r4, r5, r6, r7} | |
81 | stmmiia ip!, {r4, r5} | |
82 | tst r8, #(1 << 30) | |
83 | mov r8, r1 | |
84 | strne r1, [ip], #4 | |
f91a8dcc NP |
85 | |
86 | 3: subs r2, r2, #64 | |
455bd4c4 ID |
87 | stmgeia ip!, {r1, r3-r8, lr} |
88 | stmgeia ip!, {r1, r3-r8, lr} | |
f91a8dcc | 89 | bgt 3b |
455bd4c4 | 90 | ldmeqfd sp!, {r4-r8, pc} |
f91a8dcc NP |
91 | |
92 | tst r2, #32 | |
455bd4c4 | 93 | stmneia ip!, {r1, r3-r8, lr} |
f91a8dcc | 94 | tst r2, #16 |
455bd4c4 ID |
95 | stmneia ip!, {r4-r7} |
96 | ldmfd sp!, {r4-r8, lr} | |
f91a8dcc NP |
97 | |
98 | #endif | |
99 | ||
1da177e4 | 100 | 4: tst r2, #8 |
455bd4c4 | 101 | stmneia ip!, {r1, r3} |
1da177e4 | 102 | tst r2, #4 |
455bd4c4 | 103 | strne r1, [ip], #4 |
1da177e4 LT |
104 | /* |
105 | * When we get here, we've got less than 4 bytes to zero. We | |
106 | * may have an unaligned pointer as well. | |
107 | */ | |
108 | 5: tst r2, #2 | |
455bd4c4 ID |
109 | strneb r1, [ip], #1 |
110 | strneb r1, [ip], #1 | |
1da177e4 | 111 | tst r2, #1 |
455bd4c4 | 112 | strneb r1, [ip], #1 |
7999d8d7 | 113 | mov pc, lr |
418df63a NP |
114 | |
115 | 6: subs r2, r2, #4 @ 1 do we have enough | |
116 | blt 5b @ 1 bytes to align with? | |
117 | cmp r3, #2 @ 1 | |
118 | strltb r1, [ip], #1 @ 1 | |
119 | strleb r1, [ip], #1 @ 1 | |
120 | strb r1, [ip], #1 @ 1 | |
121 | add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) | |
122 | b 1b | |
93ed3970 | 123 | ENDPROC(memset) |