Merge remote-tracking branch 'iommu/next'
[deliverable/linux.git] / arch / arm / lib / csumpartialcopygeneric.S
1 /*
2 * linux/arch/arm/lib/csumpartialcopygeneric.S
3 *
4 * Copyright (C) 1995-2001 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10 #include <asm/assembler.h>
11 #include <asm/export.h>
12
13 /*
14 * unsigned int
15 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
16 * r0 = src, r1 = dst, r2 = len, r3 = sum
17 * Returns : r0 = checksum
18 *
19 * Note that 'tst' and 'teq' preserve the carry flag.
20 */
21
22 src .req r0
23 dst .req r1
24 len .req r2
25 sum .req r3
26
27 .Lzero: mov r0, sum
28 load_regs
29
30 /*
31 * Align an unaligned destination pointer. We know that
32 * we have >= 8 bytes here, so we don't need to check
33 * the length. Note that the source pointer hasn't been
34 * aligned yet.
35 */
36 .Ldst_unaligned:
37 tst dst, #1
38 beq .Ldst_16bit
39
40 load1b ip
41 sub len, len, #1
42 adcs sum, sum, ip, put_byte_1 @ update checksum
43 strb ip, [dst], #1
44 tst dst, #2
45 reteq lr @ dst is now 32bit aligned
46
47 .Ldst_16bit: load2b r8, ip
48 sub len, len, #2
49 adcs sum, sum, r8, put_byte_0
50 strb r8, [dst], #1
51 adcs sum, sum, ip, put_byte_1
52 strb ip, [dst], #1
53 ret lr @ dst is now 32bit aligned
54
55 /*
56 * Handle 0 to 7 bytes, with any alignment of source and
57 * destination pointers. Note that when we get here, C = 0
58 */
59 .Lless8: teq len, #0 @ check for zero count
60 beq .Lzero
61
62 /* we must have at least one byte. */
63 tst dst, #1 @ dst 16-bit aligned
64 beq .Lless8_aligned
65
66 /* Align dst */
67 load1b ip
68 sub len, len, #1
69 adcs sum, sum, ip, put_byte_1 @ update checksum
70 strb ip, [dst], #1
71 tst len, #6
72 beq .Lless8_byteonly
73
74 1: load2b r8, ip
75 sub len, len, #2
76 adcs sum, sum, r8, put_byte_0
77 strb r8, [dst], #1
78 adcs sum, sum, ip, put_byte_1
79 strb ip, [dst], #1
80 .Lless8_aligned:
81 tst len, #6
82 bne 1b
83 .Lless8_byteonly:
84 tst len, #1
85 beq .Ldone
86 load1b r8
87 adcs sum, sum, r8, put_byte_0 @ update checksum
88 strb r8, [dst], #1
89 b .Ldone
90
91 FN_ENTRY
92 save_regs
93
94 cmp len, #8 @ Ensure that we have at least
95 blo .Lless8 @ 8 bytes to copy.
96
97 adds sum, sum, #0 @ C = 0
98 tst dst, #3 @ Test destination alignment
99 blne .Ldst_unaligned @ align destination, return here
100
101 /*
102 * Ok, the dst pointer is now 32bit aligned, and we know
103 * that we must have more than 4 bytes to copy. Note
104 * that C contains the carry from the dst alignment above.
105 */
106
107 tst src, #3 @ Test source alignment
108 bne .Lsrc_not_aligned
109
110 /* Routine for src & dst aligned */
111
112 bics ip, len, #15
113 beq 2f
114
115 1: load4l r4, r5, r6, r7
116 stmia dst!, {r4, r5, r6, r7}
117 adcs sum, sum, r4
118 adcs sum, sum, r5
119 adcs sum, sum, r6
120 adcs sum, sum, r7
121 sub ip, ip, #16
122 teq ip, #0
123 bne 1b
124
125 2: ands ip, len, #12
126 beq 4f
127 tst ip, #8
128 beq 3f
129 load2l r4, r5
130 stmia dst!, {r4, r5}
131 adcs sum, sum, r4
132 adcs sum, sum, r5
133 tst ip, #4
134 beq 4f
135
136 3: load1l r4
137 str r4, [dst], #4
138 adcs sum, sum, r4
139
140 4: ands len, len, #3
141 beq .Ldone
142 load1l r4
143 tst len, #2
144 mov r5, r4, get_byte_0
145 beq .Lexit
146 adcs sum, sum, r4, lspush #16
147 strb r5, [dst], #1
148 mov r5, r4, get_byte_1
149 strb r5, [dst], #1
150 mov r5, r4, get_byte_2
151 .Lexit: tst len, #1
152 strneb r5, [dst], #1
153 andne r5, r5, #255
154 adcnes sum, sum, r5, put_byte_0
155
156 /*
157 * If the dst pointer was not 16-bit aligned, we
158 * need to rotate the checksum here to get around
159 * the inefficient byte manipulations in the
160 * architecture independent code.
161 */
162 .Ldone: adc r0, sum, #0
163 ldr sum, [sp, #0] @ dst
164 tst sum, #1
165 movne r0, r0, ror #8
166 load_regs
167
168 .Lsrc_not_aligned:
169 adc sum, sum, #0 @ include C from dst alignment
170 and ip, src, #3
171 bic src, src, #3
172 load1l r5
173 cmp ip, #2
174 beq .Lsrc2_aligned
175 bhi .Lsrc3_aligned
176 mov r4, r5, lspull #8 @ C = 0
177 bics ip, len, #15
178 beq 2f
179 1: load4l r5, r6, r7, r8
180 orr r4, r4, r5, lspush #24
181 mov r5, r5, lspull #8
182 orr r5, r5, r6, lspush #24
183 mov r6, r6, lspull #8
184 orr r6, r6, r7, lspush #24
185 mov r7, r7, lspull #8
186 orr r7, r7, r8, lspush #24
187 stmia dst!, {r4, r5, r6, r7}
188 adcs sum, sum, r4
189 adcs sum, sum, r5
190 adcs sum, sum, r6
191 adcs sum, sum, r7
192 mov r4, r8, lspull #8
193 sub ip, ip, #16
194 teq ip, #0
195 bne 1b
196 2: ands ip, len, #12
197 beq 4f
198 tst ip, #8
199 beq 3f
200 load2l r5, r6
201 orr r4, r4, r5, lspush #24
202 mov r5, r5, lspull #8
203 orr r5, r5, r6, lspush #24
204 stmia dst!, {r4, r5}
205 adcs sum, sum, r4
206 adcs sum, sum, r5
207 mov r4, r6, lspull #8
208 tst ip, #4
209 beq 4f
210 3: load1l r5
211 orr r4, r4, r5, lspush #24
212 str r4, [dst], #4
213 adcs sum, sum, r4
214 mov r4, r5, lspull #8
215 4: ands len, len, #3
216 beq .Ldone
217 mov r5, r4, get_byte_0
218 tst len, #2
219 beq .Lexit
220 adcs sum, sum, r4, lspush #16
221 strb r5, [dst], #1
222 mov r5, r4, get_byte_1
223 strb r5, [dst], #1
224 mov r5, r4, get_byte_2
225 b .Lexit
226
227 .Lsrc2_aligned: mov r4, r5, lspull #16
228 adds sum, sum, #0
229 bics ip, len, #15
230 beq 2f
231 1: load4l r5, r6, r7, r8
232 orr r4, r4, r5, lspush #16
233 mov r5, r5, lspull #16
234 orr r5, r5, r6, lspush #16
235 mov r6, r6, lspull #16
236 orr r6, r6, r7, lspush #16
237 mov r7, r7, lspull #16
238 orr r7, r7, r8, lspush #16
239 stmia dst!, {r4, r5, r6, r7}
240 adcs sum, sum, r4
241 adcs sum, sum, r5
242 adcs sum, sum, r6
243 adcs sum, sum, r7
244 mov r4, r8, lspull #16
245 sub ip, ip, #16
246 teq ip, #0
247 bne 1b
248 2: ands ip, len, #12
249 beq 4f
250 tst ip, #8
251 beq 3f
252 load2l r5, r6
253 orr r4, r4, r5, lspush #16
254 mov r5, r5, lspull #16
255 orr r5, r5, r6, lspush #16
256 stmia dst!, {r4, r5}
257 adcs sum, sum, r4
258 adcs sum, sum, r5
259 mov r4, r6, lspull #16
260 tst ip, #4
261 beq 4f
262 3: load1l r5
263 orr r4, r4, r5, lspush #16
264 str r4, [dst], #4
265 adcs sum, sum, r4
266 mov r4, r5, lspull #16
267 4: ands len, len, #3
268 beq .Ldone
269 mov r5, r4, get_byte_0
270 tst len, #2
271 beq .Lexit
272 adcs sum, sum, r4
273 strb r5, [dst], #1
274 mov r5, r4, get_byte_1
275 strb r5, [dst], #1
276 tst len, #1
277 beq .Ldone
278 load1b r5
279 b .Lexit
280
281 .Lsrc3_aligned: mov r4, r5, lspull #24
282 adds sum, sum, #0
283 bics ip, len, #15
284 beq 2f
285 1: load4l r5, r6, r7, r8
286 orr r4, r4, r5, lspush #8
287 mov r5, r5, lspull #24
288 orr r5, r5, r6, lspush #8
289 mov r6, r6, lspull #24
290 orr r6, r6, r7, lspush #8
291 mov r7, r7, lspull #24
292 orr r7, r7, r8, lspush #8
293 stmia dst!, {r4, r5, r6, r7}
294 adcs sum, sum, r4
295 adcs sum, sum, r5
296 adcs sum, sum, r6
297 adcs sum, sum, r7
298 mov r4, r8, lspull #24
299 sub ip, ip, #16
300 teq ip, #0
301 bne 1b
302 2: ands ip, len, #12
303 beq 4f
304 tst ip, #8
305 beq 3f
306 load2l r5, r6
307 orr r4, r4, r5, lspush #8
308 mov r5, r5, lspull #24
309 orr r5, r5, r6, lspush #8
310 stmia dst!, {r4, r5}
311 adcs sum, sum, r4
312 adcs sum, sum, r5
313 mov r4, r6, lspull #24
314 tst ip, #4
315 beq 4f
316 3: load1l r5
317 orr r4, r4, r5, lspush #8
318 str r4, [dst], #4
319 adcs sum, sum, r4
320 mov r4, r5, lspull #24
321 4: ands len, len, #3
322 beq .Ldone
323 mov r5, r4, get_byte_0
324 tst len, #2
325 beq .Lexit
326 strb r5, [dst], #1
327 adcs sum, sum, r4
328 load1l r4
329 mov r5, r4, get_byte_0
330 strb r5, [dst], #1
331 adcs sum, sum, r4, lspush #24
332 mov r5, r4, get_byte_1
333 b .Lexit
334 FN_EXIT
335 FN_EXPORT
This page took 0.039859 seconds and 6 git commands to generate.