Linux-2.6.12-rc2
[deliverable/linux.git] / arch / mips / lib-32 / csum_partial.S
1 /*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1998 Ralf Baechle
7 */
8 #include <asm/asm.h>
9 #include <asm/regdef.h>
10
11 #define ADDC(sum,reg) \
12 addu sum, reg; \
13 sltu v1, sum, reg; \
14 addu sum, v1
15
16 #define CSUM_BIGCHUNK(src, offset, sum, t0, t1, t2, t3) \
17 lw t0, (offset + 0x00)(src); \
18 lw t1, (offset + 0x04)(src); \
19 lw t2, (offset + 0x08)(src); \
20 lw t3, (offset + 0x0c)(src); \
21 ADDC(sum, t0); \
22 ADDC(sum, t1); \
23 ADDC(sum, t2); \
24 ADDC(sum, t3); \
25 lw t0, (offset + 0x10)(src); \
26 lw t1, (offset + 0x14)(src); \
27 lw t2, (offset + 0x18)(src); \
28 lw t3, (offset + 0x1c)(src); \
29 ADDC(sum, t0); \
30 ADDC(sum, t1); \
31 ADDC(sum, t2); \
32 ADDC(sum, t3); \
33
34 /*
35 * a0: source address
36 * a1: length of the area to checksum
37 * a2: partial checksum
38 */
39
40 #define src a0
41 #define dest a1
42 #define sum v0
43
44 .text
45 .set noreorder
46
47 /* unknown src alignment and < 8 bytes to go */
48 small_csumcpy:
49 move a1, t2
50
51 andi t0, a1, 4
52 beqz t0, 1f
53 andi t0, a1, 2
54
55 /* Still a full word to go */
56 ulw t1, (src)
57 addiu src, 4
58 ADDC(sum, t1)
59
60 1: move t1, zero
61 beqz t0, 1f
62 andi t0, a1, 1
63
64 /* Still a halfword to go */
65 ulhu t1, (src)
66 addiu src, 2
67
68 1: beqz t0, 1f
69 sll t1, t1, 16
70
71 lbu t2, (src)
72 nop
73
74 #ifdef __MIPSEB__
75 sll t2, t2, 8
76 #endif
77 or t1, t2
78
79 1: ADDC(sum, t1)
80
81 /* fold checksum */
82 sll v1, sum, 16
83 addu sum, v1
84 sltu v1, sum, v1
85 srl sum, sum, 16
86 addu sum, v1
87
88 /* odd buffer alignment? */
89 beqz t7, 1f
90 nop
91 sll v1, sum, 8
92 srl sum, sum, 8
93 or sum, v1
94 andi sum, 0xffff
95 1:
96 .set reorder
97 /* Add the passed partial csum. */
98 ADDC(sum, a2)
99 jr ra
100 .set noreorder
101
102 /* ------------------------------------------------------------------------- */
103
104 .align 5
105 LEAF(csum_partial)
106 move sum, zero
107 move t7, zero
108
109 sltiu t8, a1, 0x8
110 bnez t8, small_csumcpy /* < 8 bytes to copy */
111 move t2, a1
112
113 beqz a1, out
114 andi t7, src, 0x1 /* odd buffer? */
115
116 hword_align:
117 beqz t7, word_align
118 andi t8, src, 0x2
119
120 lbu t0, (src)
121 subu a1, a1, 0x1
122 #ifdef __MIPSEL__
123 sll t0, t0, 8
124 #endif
125 ADDC(sum, t0)
126 addu src, src, 0x1
127 andi t8, src, 0x2
128
129 word_align:
130 beqz t8, dword_align
131 sltiu t8, a1, 56
132
133 lhu t0, (src)
134 subu a1, a1, 0x2
135 ADDC(sum, t0)
136 sltiu t8, a1, 56
137 addu src, src, 0x2
138
139 dword_align:
140 bnez t8, do_end_words
141 move t8, a1
142
143 andi t8, src, 0x4
144 beqz t8, qword_align
145 andi t8, src, 0x8
146
147 lw t0, 0x00(src)
148 subu a1, a1, 0x4
149 ADDC(sum, t0)
150 addu src, src, 0x4
151 andi t8, src, 0x8
152
153 qword_align:
154 beqz t8, oword_align
155 andi t8, src, 0x10
156
157 lw t0, 0x00(src)
158 lw t1, 0x04(src)
159 subu a1, a1, 0x8
160 ADDC(sum, t0)
161 ADDC(sum, t1)
162 addu src, src, 0x8
163 andi t8, src, 0x10
164
165 oword_align:
166 beqz t8, begin_movement
167 srl t8, a1, 0x7
168
169 lw t3, 0x08(src)
170 lw t4, 0x0c(src)
171 lw t0, 0x00(src)
172 lw t1, 0x04(src)
173 ADDC(sum, t3)
174 ADDC(sum, t4)
175 ADDC(sum, t0)
176 ADDC(sum, t1)
177 subu a1, a1, 0x10
178 addu src, src, 0x10
179 srl t8, a1, 0x7
180
181 begin_movement:
182 beqz t8, 1f
183 andi t2, a1, 0x40
184
185 move_128bytes:
186 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
187 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
188 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
189 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
190 subu t8, t8, 0x01
191 bnez t8, move_128bytes
192 addu src, src, 0x80
193
194 1:
195 beqz t2, 1f
196 andi t2, a1, 0x20
197
198 move_64bytes:
199 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
200 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
201 addu src, src, 0x40
202
203 1:
204 beqz t2, do_end_words
205 andi t8, a1, 0x1c
206
207 move_32bytes:
208 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209 andi t8, a1, 0x1c
210 addu src, src, 0x20
211
212 do_end_words:
213 beqz t8, maybe_end_cruft
214 srl t8, t8, 0x2
215
216 end_words:
217 lw t0, (src)
218 subu t8, t8, 0x1
219 ADDC(sum, t0)
220 bnez t8, end_words
221 addu src, src, 0x4
222
223 maybe_end_cruft:
224 andi t2, a1, 0x3
225
226 small_memcpy:
227 j small_csumcpy; move a1, t2
228 beqz t2, out
229 move a1, t2
230
231 end_bytes:
232 lb t0, (src)
233 subu a1, a1, 0x1
234 bnez a2, end_bytes
235 addu src, src, 0x1
236
237 out:
238 jr ra
239 move v0, sum
240 END(csum_partial)
This page took 0.044721 seconds and 5 git commands to generate.