Merge remote-tracking branch 'mfd/for-mfd-next'
[deliverable/linux.git] / arch / arm / lib / csumpartialcopygeneric.S
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/arm/lib/csumpartialcopygeneric.S
3 *
4 * Copyright (C) 1995-2001 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
6ebbf2ce 10#include <asm/assembler.h>
4dd1837d 11#include <asm/export.h>
1da177e4
LT
12
13/*
14 * unsigned int
15 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
16 * r0 = src, r1 = dst, r2 = len, r3 = sum
17 * Returns : r0 = checksum
18 *
19 * Note that 'tst' and 'teq' preserve the carry flag.
20 */
21
22src .req r0
23dst .req r1
24len .req r2
25sum .req r3
26
8adbb371 27.Lzero: mov r0, sum
90303b10 28 load_regs
1da177e4
LT
29
30 /*
31 * Align an unaligned destination pointer. We know that
32 * we have >= 8 bytes here, so we don't need to check
33 * the length. Note that the source pointer hasn't been
34 * aligned yet.
35 */
8adbb371
NP
36.Ldst_unaligned:
37 tst dst, #1
38 beq .Ldst_16bit
1da177e4
LT
39
40 load1b ip
41 sub len, len, #1
42 adcs sum, sum, ip, put_byte_1 @ update checksum
43 strb ip, [dst], #1
44 tst dst, #2
6ebbf2ce 45 reteq lr @ dst is now 32bit aligned
1da177e4 46
8adbb371 47.Ldst_16bit: load2b r8, ip
1da177e4
LT
48 sub len, len, #2
49 adcs sum, sum, r8, put_byte_0
50 strb r8, [dst], #1
51 adcs sum, sum, ip, put_byte_1
52 strb ip, [dst], #1
6ebbf2ce 53 ret lr @ dst is now 32bit aligned
1da177e4
LT
54
55 /*
56 * Handle 0 to 7 bytes, with any alignment of source and
57 * destination pointers. Note that when we get here, C = 0
58 */
8adbb371
NP
59.Lless8: teq len, #0 @ check for zero count
60 beq .Lzero
1da177e4
LT
61
62 /* we must have at least one byte. */
63 tst dst, #1 @ dst 16-bit aligned
8adbb371 64 beq .Lless8_aligned
1da177e4
LT
65
66 /* Align dst */
67 load1b ip
68 sub len, len, #1
69 adcs sum, sum, ip, put_byte_1 @ update checksum
70 strb ip, [dst], #1
71 tst len, #6
8adbb371 72 beq .Lless8_byteonly
1da177e4
LT
73
741: load2b r8, ip
75 sub len, len, #2
76 adcs sum, sum, r8, put_byte_0
77 strb r8, [dst], #1
78 adcs sum, sum, ip, put_byte_1
79 strb ip, [dst], #1
8adbb371
NP
80.Lless8_aligned:
81 tst len, #6
1da177e4 82 bne 1b
8adbb371 83.Lless8_byteonly:
1da177e4 84 tst len, #1
8adbb371 85 beq .Ldone
1da177e4
LT
86 load1b r8
87 adcs sum, sum, r8, put_byte_0 @ update checksum
88 strb r8, [dst], #1
8adbb371 89 b .Ldone
1da177e4
LT
90
91FN_ENTRY
1da177e4 92 save_regs
1da177e4
LT
93
94 cmp len, #8 @ Ensure that we have at least
8adbb371 95 blo .Lless8 @ 8 bytes to copy.
1da177e4
LT
96
97 adds sum, sum, #0 @ C = 0
98 tst dst, #3 @ Test destination alignment
8adbb371 99 blne .Ldst_unaligned @ align destination, return here
1da177e4
LT
100
101 /*
102 * Ok, the dst pointer is now 32bit aligned, and we know
103 * that we must have more than 4 bytes to copy. Note
104 * that C contains the carry from the dst alignment above.
105 */
106
107 tst src, #3 @ Test source alignment
8adbb371 108 bne .Lsrc_not_aligned
1da177e4
LT
109
110 /* Routine for src & dst aligned */
111
112 bics ip, len, #15
113 beq 2f
114
1151: load4l r4, r5, r6, r7
116 stmia dst!, {r4, r5, r6, r7}
117 adcs sum, sum, r4
118 adcs sum, sum, r5
119 adcs sum, sum, r6
120 adcs sum, sum, r7
121 sub ip, ip, #16
122 teq ip, #0
123 bne 1b
124
1252: ands ip, len, #12
126 beq 4f
127 tst ip, #8
128 beq 3f
129 load2l r4, r5
130 stmia dst!, {r4, r5}
131 adcs sum, sum, r4
132 adcs sum, sum, r5
133 tst ip, #4
134 beq 4f
135
1363: load1l r4
137 str r4, [dst], #4
138 adcs sum, sum, r4
139
1404: ands len, len, #3
8adbb371 141 beq .Ldone
1da177e4
LT
142 load1l r4
143 tst len, #2
144 mov r5, r4, get_byte_0
8adbb371 145 beq .Lexit
d98b90ea 146 adcs sum, sum, r4, lspush #16
1da177e4
LT
147 strb r5, [dst], #1
148 mov r5, r4, get_byte_1
149 strb r5, [dst], #1
150 mov r5, r4, get_byte_2
8adbb371 151.Lexit: tst len, #1
1da177e4
LT
152 strneb r5, [dst], #1
153 andne r5, r5, #255
154 adcnes sum, sum, r5, put_byte_0
155
156 /*
157 * If the dst pointer was not 16-bit aligned, we
158 * need to rotate the checksum here to get around
159 * the inefficient byte manipulations in the
160 * architecture independent code.
161 */
8adbb371 162.Ldone: adc r0, sum, #0
1da177e4
LT
163 ldr sum, [sp, #0] @ dst
164 tst sum, #1
165 movne r0, r0, ror #8
90303b10 166 load_regs
1da177e4 167
8adbb371 168.Lsrc_not_aligned:
1da177e4
LT
169 adc sum, sum, #0 @ include C from dst alignment
170 and ip, src, #3
171 bic src, src, #3
172 load1l r5
173 cmp ip, #2
8adbb371
NP
174 beq .Lsrc2_aligned
175 bhi .Lsrc3_aligned
d98b90ea 176 mov r4, r5, lspull #8 @ C = 0
1da177e4
LT
177 bics ip, len, #15
178 beq 2f
1791: load4l r5, r6, r7, r8
d98b90ea
VK
180 orr r4, r4, r5, lspush #24
181 mov r5, r5, lspull #8
182 orr r5, r5, r6, lspush #24
183 mov r6, r6, lspull #8
184 orr r6, r6, r7, lspush #24
185 mov r7, r7, lspull #8
186 orr r7, r7, r8, lspush #24
1da177e4
LT
187 stmia dst!, {r4, r5, r6, r7}
188 adcs sum, sum, r4
189 adcs sum, sum, r5
190 adcs sum, sum, r6
191 adcs sum, sum, r7
d98b90ea 192 mov r4, r8, lspull #8
1da177e4
LT
193 sub ip, ip, #16
194 teq ip, #0
195 bne 1b
1962: ands ip, len, #12
197 beq 4f
198 tst ip, #8
199 beq 3f
200 load2l r5, r6
d98b90ea
VK
201 orr r4, r4, r5, lspush #24
202 mov r5, r5, lspull #8
203 orr r5, r5, r6, lspush #24
1da177e4
LT
204 stmia dst!, {r4, r5}
205 adcs sum, sum, r4
206 adcs sum, sum, r5
d98b90ea 207 mov r4, r6, lspull #8
1da177e4
LT
208 tst ip, #4
209 beq 4f
2103: load1l r5
d98b90ea 211 orr r4, r4, r5, lspush #24
1da177e4
LT
212 str r4, [dst], #4
213 adcs sum, sum, r4
d98b90ea 214 mov r4, r5, lspull #8
1da177e4 2154: ands len, len, #3
8adbb371 216 beq .Ldone
1da177e4
LT
217 mov r5, r4, get_byte_0
218 tst len, #2
8adbb371 219 beq .Lexit
d98b90ea 220 adcs sum, sum, r4, lspush #16
1da177e4
LT
221 strb r5, [dst], #1
222 mov r5, r4, get_byte_1
223 strb r5, [dst], #1
224 mov r5, r4, get_byte_2
8adbb371 225 b .Lexit
1da177e4 226
d98b90ea 227.Lsrc2_aligned: mov r4, r5, lspull #16
1da177e4
LT
228 adds sum, sum, #0
229 bics ip, len, #15
230 beq 2f
2311: load4l r5, r6, r7, r8
d98b90ea
VK
232 orr r4, r4, r5, lspush #16
233 mov r5, r5, lspull #16
234 orr r5, r5, r6, lspush #16
235 mov r6, r6, lspull #16
236 orr r6, r6, r7, lspush #16
237 mov r7, r7, lspull #16
238 orr r7, r7, r8, lspush #16
1da177e4
LT
239 stmia dst!, {r4, r5, r6, r7}
240 adcs sum, sum, r4
241 adcs sum, sum, r5
242 adcs sum, sum, r6
243 adcs sum, sum, r7
d98b90ea 244 mov r4, r8, lspull #16
1da177e4
LT
245 sub ip, ip, #16
246 teq ip, #0
247 bne 1b
2482: ands ip, len, #12
249 beq 4f
250 tst ip, #8
251 beq 3f
252 load2l r5, r6
d98b90ea
VK
253 orr r4, r4, r5, lspush #16
254 mov r5, r5, lspull #16
255 orr r5, r5, r6, lspush #16
1da177e4
LT
256 stmia dst!, {r4, r5}
257 adcs sum, sum, r4
258 adcs sum, sum, r5
d98b90ea 259 mov r4, r6, lspull #16
1da177e4
LT
260 tst ip, #4
261 beq 4f
2623: load1l r5
d98b90ea 263 orr r4, r4, r5, lspush #16
1da177e4
LT
264 str r4, [dst], #4
265 adcs sum, sum, r4
d98b90ea 266 mov r4, r5, lspull #16
1da177e4 2674: ands len, len, #3
8adbb371 268 beq .Ldone
1da177e4
LT
269 mov r5, r4, get_byte_0
270 tst len, #2
8adbb371 271 beq .Lexit
1da177e4
LT
272 adcs sum, sum, r4
273 strb r5, [dst], #1
274 mov r5, r4, get_byte_1
275 strb r5, [dst], #1
276 tst len, #1
8adbb371 277 beq .Ldone
1da177e4 278 load1b r5
8adbb371 279 b .Lexit
1da177e4 280
d98b90ea 281.Lsrc3_aligned: mov r4, r5, lspull #24
1da177e4
LT
282 adds sum, sum, #0
283 bics ip, len, #15
284 beq 2f
2851: load4l r5, r6, r7, r8
d98b90ea
VK
286 orr r4, r4, r5, lspush #8
287 mov r5, r5, lspull #24
288 orr r5, r5, r6, lspush #8
289 mov r6, r6, lspull #24
290 orr r6, r6, r7, lspush #8
291 mov r7, r7, lspull #24
292 orr r7, r7, r8, lspush #8
1da177e4
LT
293 stmia dst!, {r4, r5, r6, r7}
294 adcs sum, sum, r4
295 adcs sum, sum, r5
296 adcs sum, sum, r6
297 adcs sum, sum, r7
d98b90ea 298 mov r4, r8, lspull #24
1da177e4
LT
299 sub ip, ip, #16
300 teq ip, #0
301 bne 1b
3022: ands ip, len, #12
303 beq 4f
304 tst ip, #8
305 beq 3f
306 load2l r5, r6
d98b90ea
VK
307 orr r4, r4, r5, lspush #8
308 mov r5, r5, lspull #24
309 orr r5, r5, r6, lspush #8
1da177e4
LT
310 stmia dst!, {r4, r5}
311 adcs sum, sum, r4
312 adcs sum, sum, r5
d98b90ea 313 mov r4, r6, lspull #24
1da177e4
LT
314 tst ip, #4
315 beq 4f
3163: load1l r5
d98b90ea 317 orr r4, r4, r5, lspush #8
1da177e4
LT
318 str r4, [dst], #4
319 adcs sum, sum, r4
d98b90ea 320 mov r4, r5, lspull #24
1da177e4 3214: ands len, len, #3
8adbb371 322 beq .Ldone
1da177e4
LT
323 mov r5, r4, get_byte_0
324 tst len, #2
8adbb371 325 beq .Lexit
1da177e4
LT
326 strb r5, [dst], #1
327 adcs sum, sum, r4
328 load1l r4
329 mov r5, r4, get_byte_0
330 strb r5, [dst], #1
d98b90ea 331 adcs sum, sum, r4, lspush #24
1da177e4 332 mov r5, r4, get_byte_1
8adbb371 333 b .Lexit
93ed3970 334FN_EXIT
4dd1837d 335FN_EXPORT
This page took 1.019909 seconds and 5 git commands to generate.