Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * include/asm-alpha/xor.h | |
3 | * | |
4 | * Optimized RAID-5 checksumming functions for alpha EV5 and EV6 | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2, or (at your option) | |
9 | * any later version. | |
10 | * | |
11 | * You should have received a copy of the GNU General Public License | |
12 | * (for example /usr/src/linux/COPYING); if not, write to the Free | |
13 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
14 | */ | |
15 | ||
16 | extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *); | |
17 | extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *, | |
18 | unsigned long *); | |
19 | extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *, | |
20 | unsigned long *, unsigned long *); | |
21 | extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *, | |
22 | unsigned long *, unsigned long *, unsigned long *); | |
23 | ||
24 | extern void xor_alpha_prefetch_2(unsigned long, unsigned long *, | |
25 | unsigned long *); | |
26 | extern void xor_alpha_prefetch_3(unsigned long, unsigned long *, | |
27 | unsigned long *, unsigned long *); | |
28 | extern void xor_alpha_prefetch_4(unsigned long, unsigned long *, | |
29 | unsigned long *, unsigned long *, | |
30 | unsigned long *); | |
31 | extern void xor_alpha_prefetch_5(unsigned long, unsigned long *, | |
32 | unsigned long *, unsigned long *, | |
33 | unsigned long *, unsigned long *); | |
34 | ||
35 | asm(" \n\ | |
36 | .text \n\ | |
37 | .align 3 \n\ | |
38 | .ent xor_alpha_2 \n\ | |
39 | xor_alpha_2: \n\ | |
40 | .prologue 0 \n\ | |
41 | srl $16, 6, $16 \n\ | |
42 | .align 4 \n\ | |
43 | 2: \n\ | |
44 | ldq $0,0($17) \n\ | |
45 | ldq $1,0($18) \n\ | |
46 | ldq $2,8($17) \n\ | |
47 | ldq $3,8($18) \n\ | |
48 | \n\ | |
49 | ldq $4,16($17) \n\ | |
50 | ldq $5,16($18) \n\ | |
51 | ldq $6,24($17) \n\ | |
52 | ldq $7,24($18) \n\ | |
53 | \n\ | |
54 | ldq $19,32($17) \n\ | |
55 | ldq $20,32($18) \n\ | |
56 | ldq $21,40($17) \n\ | |
57 | ldq $22,40($18) \n\ | |
58 | \n\ | |
59 | ldq $23,48($17) \n\ | |
60 | ldq $24,48($18) \n\ | |
61 | ldq $25,56($17) \n\ | |
62 | xor $0,$1,$0 # 7 cycles from $1 load \n\ | |
63 | \n\ | |
64 | ldq $27,56($18) \n\ | |
65 | xor $2,$3,$2 \n\ | |
66 | stq $0,0($17) \n\ | |
67 | xor $4,$5,$4 \n\ | |
68 | \n\ | |
69 | stq $2,8($17) \n\ | |
70 | xor $6,$7,$6 \n\ | |
71 | stq $4,16($17) \n\ | |
72 | xor $19,$20,$19 \n\ | |
73 | \n\ | |
74 | stq $6,24($17) \n\ | |
75 | xor $21,$22,$21 \n\ | |
76 | stq $19,32($17) \n\ | |
77 | xor $23,$24,$23 \n\ | |
78 | \n\ | |
79 | stq $21,40($17) \n\ | |
80 | xor $25,$27,$25 \n\ | |
81 | stq $23,48($17) \n\ | |
82 | subq $16,1,$16 \n\ | |
83 | \n\ | |
84 | stq $25,56($17) \n\ | |
85 | addq $17,64,$17 \n\ | |
86 | addq $18,64,$18 \n\ | |
87 | bgt $16,2b \n\ | |
88 | \n\ | |
89 | ret \n\ | |
90 | .end xor_alpha_2 \n\ | |
91 | \n\ | |
92 | .align 3 \n\ | |
93 | .ent xor_alpha_3 \n\ | |
94 | xor_alpha_3: \n\ | |
95 | .prologue 0 \n\ | |
96 | srl $16, 6, $16 \n\ | |
97 | .align 4 \n\ | |
98 | 3: \n\ | |
99 | ldq $0,0($17) \n\ | |
100 | ldq $1,0($18) \n\ | |
101 | ldq $2,0($19) \n\ | |
102 | ldq $3,8($17) \n\ | |
103 | \n\ | |
104 | ldq $4,8($18) \n\ | |
105 | ldq $6,16($17) \n\ | |
106 | ldq $7,16($18) \n\ | |
107 | ldq $21,24($17) \n\ | |
108 | \n\ | |
109 | ldq $22,24($18) \n\ | |
110 | ldq $24,32($17) \n\ | |
111 | ldq $25,32($18) \n\ | |
112 | ldq $5,8($19) \n\ | |
113 | \n\ | |
114 | ldq $20,16($19) \n\ | |
115 | ldq $23,24($19) \n\ | |
116 | ldq $27,32($19) \n\ | |
117 | nop \n\ | |
118 | \n\ | |
119 | xor $0,$1,$1 # 8 cycles from $0 load \n\ | |
120 | xor $3,$4,$4 # 6 cycles from $4 load \n\ | |
121 | xor $6,$7,$7 # 6 cycles from $7 load \n\ | |
122 | xor $21,$22,$22 # 5 cycles from $22 load \n\ | |
123 | \n\ | |
124 | xor $1,$2,$2 # 9 cycles from $2 load \n\ | |
125 | xor $24,$25,$25 # 5 cycles from $25 load \n\ | |
126 | stq $2,0($17) \n\ | |
127 | xor $4,$5,$5 # 6 cycles from $5 load \n\ | |
128 | \n\ | |
129 | stq $5,8($17) \n\ | |
130 | xor $7,$20,$20 # 7 cycles from $20 load \n\ | |
131 | stq $20,16($17) \n\ | |
132 | xor $22,$23,$23 # 7 cycles from $23 load \n\ | |
133 | \n\ | |
134 | stq $23,24($17) \n\ | |
135 | xor $25,$27,$27 # 7 cycles from $27 load \n\ | |
136 | stq $27,32($17) \n\ | |
137 | nop \n\ | |
138 | \n\ | |
139 | ldq $0,40($17) \n\ | |
140 | ldq $1,40($18) \n\ | |
141 | ldq $3,48($17) \n\ | |
142 | ldq $4,48($18) \n\ | |
143 | \n\ | |
144 | ldq $6,56($17) \n\ | |
145 | ldq $7,56($18) \n\ | |
146 | ldq $2,40($19) \n\ | |
147 | ldq $5,48($19) \n\ | |
148 | \n\ | |
149 | ldq $20,56($19) \n\ | |
150 | xor $0,$1,$1 # 4 cycles from $1 load \n\ | |
151 | xor $3,$4,$4 # 5 cycles from $4 load \n\ | |
152 | xor $6,$7,$7 # 5 cycles from $7 load \n\ | |
153 | \n\ | |
154 | xor $1,$2,$2 # 4 cycles from $2 load \n\ | |
155 | xor $4,$5,$5 # 5 cycles from $5 load \n\ | |
156 | stq $2,40($17) \n\ | |
157 | xor $7,$20,$20 # 4 cycles from $20 load \n\ | |
158 | \n\ | |
159 | stq $5,48($17) \n\ | |
160 | subq $16,1,$16 \n\ | |
161 | stq $20,56($17) \n\ | |
162 | addq $19,64,$19 \n\ | |
163 | \n\ | |
164 | addq $18,64,$18 \n\ | |
165 | addq $17,64,$17 \n\ | |
166 | bgt $16,3b \n\ | |
167 | ret \n\ | |
168 | .end xor_alpha_3 \n\ | |
169 | \n\ | |
170 | .align 3 \n\ | |
171 | .ent xor_alpha_4 \n\ | |
172 | xor_alpha_4: \n\ | |
173 | .prologue 0 \n\ | |
174 | srl $16, 6, $16 \n\ | |
175 | .align 4 \n\ | |
176 | 4: \n\ | |
177 | ldq $0,0($17) \n\ | |
178 | ldq $1,0($18) \n\ | |
179 | ldq $2,0($19) \n\ | |
180 | ldq $3,0($20) \n\ | |
181 | \n\ | |
182 | ldq $4,8($17) \n\ | |
183 | ldq $5,8($18) \n\ | |
184 | ldq $6,8($19) \n\ | |
185 | ldq $7,8($20) \n\ | |
186 | \n\ | |
187 | ldq $21,16($17) \n\ | |
188 | ldq $22,16($18) \n\ | |
189 | ldq $23,16($19) \n\ | |
190 | ldq $24,16($20) \n\ | |
191 | \n\ | |
192 | ldq $25,24($17) \n\ | |
193 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | |
194 | ldq $27,24($18) \n\ | |
195 | xor $2,$3,$3 # 6 cycles from $3 load \n\ | |
196 | \n\ | |
197 | ldq $0,24($19) \n\ | |
198 | xor $1,$3,$3 \n\ | |
199 | ldq $1,24($20) \n\ | |
200 | xor $4,$5,$5 # 7 cycles from $5 load \n\ | |
201 | \n\ | |
202 | stq $3,0($17) \n\ | |
203 | xor $6,$7,$7 \n\ | |
204 | xor $21,$22,$22 # 7 cycles from $22 load \n\ | |
205 | xor $5,$7,$7 \n\ | |
206 | \n\ | |
207 | stq $7,8($17) \n\ | |
208 | xor $23,$24,$24 # 7 cycles from $24 load \n\ | |
209 | ldq $2,32($17) \n\ | |
210 | xor $22,$24,$24 \n\ | |
211 | \n\ | |
212 | ldq $3,32($18) \n\ | |
213 | ldq $4,32($19) \n\ | |
214 | ldq $5,32($20) \n\ | |
215 | xor $25,$27,$27 # 8 cycles from $27 load \n\ | |
216 | \n\ | |
217 | ldq $6,40($17) \n\ | |
218 | ldq $7,40($18) \n\ | |
219 | ldq $21,40($19) \n\ | |
220 | ldq $22,40($20) \n\ | |
221 | \n\ | |
222 | stq $24,16($17) \n\ | |
223 | xor $0,$1,$1 # 9 cycles from $1 load \n\ | |
224 | xor $2,$3,$3 # 5 cycles from $3 load \n\ | |
225 | xor $27,$1,$1 \n\ | |
226 | \n\ | |
227 | stq $1,24($17) \n\ | |
228 | xor $4,$5,$5 # 5 cycles from $5 load \n\ | |
229 | ldq $23,48($17) \n\ | |
230 | ldq $24,48($18) \n\ | |
231 | \n\ | |
232 | ldq $25,48($19) \n\ | |
233 | xor $3,$5,$5 \n\ | |
234 | ldq $27,48($20) \n\ | |
235 | ldq $0,56($17) \n\ | |
236 | \n\ | |
237 | ldq $1,56($18) \n\ | |
238 | ldq $2,56($19) \n\ | |
239 | xor $6,$7,$7 # 8 cycles from $6 load \n\ | |
240 | ldq $3,56($20) \n\ | |
241 | \n\ | |
242 | stq $5,32($17) \n\ | |
243 | xor $21,$22,$22 # 8 cycles from $22 load \n\ | |
244 | xor $7,$22,$22 \n\ | |
245 | xor $23,$24,$24 # 5 cycles from $24 load \n\ | |
246 | \n\ | |
247 | stq $22,40($17) \n\ | |
248 | xor $25,$27,$27 # 5 cycles from $27 load \n\ | |
249 | xor $24,$27,$27 \n\ | |
250 | xor $0,$1,$1 # 5 cycles from $1 load \n\ | |
251 | \n\ | |
252 | stq $27,48($17) \n\ | |
253 | xor $2,$3,$3 # 4 cycles from $3 load \n\ | |
254 | xor $1,$3,$3 \n\ | |
255 | subq $16,1,$16 \n\ | |
256 | \n\ | |
257 | stq $3,56($17) \n\ | |
258 | addq $20,64,$20 \n\ | |
259 | addq $19,64,$19 \n\ | |
260 | addq $18,64,$18 \n\ | |
261 | \n\ | |
262 | addq $17,64,$17 \n\ | |
263 | bgt $16,4b \n\ | |
264 | ret \n\ | |
265 | .end xor_alpha_4 \n\ | |
266 | \n\ | |
267 | .align 3 \n\ | |
268 | .ent xor_alpha_5 \n\ | |
269 | xor_alpha_5: \n\ | |
270 | .prologue 0 \n\ | |
271 | srl $16, 6, $16 \n\ | |
272 | .align 4 \n\ | |
273 | 5: \n\ | |
274 | ldq $0,0($17) \n\ | |
275 | ldq $1,0($18) \n\ | |
276 | ldq $2,0($19) \n\ | |
277 | ldq $3,0($20) \n\ | |
278 | \n\ | |
279 | ldq $4,0($21) \n\ | |
280 | ldq $5,8($17) \n\ | |
281 | ldq $6,8($18) \n\ | |
282 | ldq $7,8($19) \n\ | |
283 | \n\ | |
284 | ldq $22,8($20) \n\ | |
285 | ldq $23,8($21) \n\ | |
286 | ldq $24,16($17) \n\ | |
287 | ldq $25,16($18) \n\ | |
288 | \n\ | |
289 | ldq $27,16($19) \n\ | |
290 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | |
291 | ldq $28,16($20) \n\ | |
292 | xor $2,$3,$3 # 6 cycles from $3 load \n\ | |
293 | \n\ | |
294 | ldq $0,16($21) \n\ | |
295 | xor $1,$3,$3 \n\ | |
296 | ldq $1,24($17) \n\ | |
297 | xor $3,$4,$4 # 7 cycles from $4 load \n\ | |
298 | \n\ | |
299 | stq $4,0($17) \n\ | |
300 | xor $5,$6,$6 # 7 cycles from $6 load \n\ | |
301 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | |
302 | xor $6,$23,$23 # 7 cycles from $23 load \n\ | |
303 | \n\ | |
304 | ldq $2,24($18) \n\ | |
305 | xor $22,$23,$23 \n\ | |
306 | ldq $3,24($19) \n\ | |
307 | xor $24,$25,$25 # 8 cycles from $25 load \n\ | |
308 | \n\ | |
309 | stq $23,8($17) \n\ | |
310 | xor $25,$27,$27 # 8 cycles from $27 load \n\ | |
311 | ldq $4,24($20) \n\ | |
312 | xor $28,$0,$0 # 7 cycles from $0 load \n\ | |
313 | \n\ | |
314 | ldq $5,24($21) \n\ | |
315 | xor $27,$0,$0 \n\ | |
316 | ldq $6,32($17) \n\ | |
317 | ldq $7,32($18) \n\ | |
318 | \n\ | |
319 | stq $0,16($17) \n\ | |
320 | xor $1,$2,$2 # 6 cycles from $2 load \n\ | |
321 | ldq $22,32($19) \n\ | |
322 | xor $3,$4,$4 # 4 cycles from $4 load \n\ | |
323 | \n\ | |
324 | ldq $23,32($20) \n\ | |
325 | xor $2,$4,$4 \n\ | |
326 | ldq $24,32($21) \n\ | |
327 | ldq $25,40($17) \n\ | |
328 | \n\ | |
329 | ldq $27,40($18) \n\ | |
330 | ldq $28,40($19) \n\ | |
331 | ldq $0,40($20) \n\ | |
332 | xor $4,$5,$5 # 7 cycles from $5 load \n\ | |
333 | \n\ | |
334 | stq $5,24($17) \n\ | |
335 | xor $6,$7,$7 # 7 cycles from $7 load \n\ | |
336 | ldq $1,40($21) \n\ | |
337 | ldq $2,48($17) \n\ | |
338 | \n\ | |
339 | ldq $3,48($18) \n\ | |
340 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | |
341 | ldq $4,48($19) \n\ | |
342 | xor $23,$24,$24 # 6 cycles from $24 load \n\ | |
343 | \n\ | |
344 | ldq $5,48($20) \n\ | |
345 | xor $22,$24,$24 \n\ | |
346 | ldq $6,48($21) \n\ | |
347 | xor $25,$27,$27 # 7 cycles from $27 load \n\ | |
348 | \n\ | |
349 | stq $24,32($17) \n\ | |
350 | xor $27,$28,$28 # 8 cycles from $28 load \n\ | |
351 | ldq $7,56($17) \n\ | |
352 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | |
353 | \n\ | |
354 | ldq $22,56($18) \n\ | |
355 | ldq $23,56($19) \n\ | |
356 | ldq $24,56($20) \n\ | |
357 | ldq $25,56($21) \n\ | |
358 | \n\ | |
359 | xor $28,$1,$1 \n\ | |
360 | xor $2,$3,$3 # 9 cycles from $3 load \n\ | |
361 | xor $3,$4,$4 # 9 cycles from $4 load \n\ | |
362 | xor $5,$6,$6 # 8 cycles from $6 load \n\ | |
363 | \n\ | |
364 | stq $1,40($17) \n\ | |
365 | xor $4,$6,$6 \n\ | |
366 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | |
367 | xor $23,$24,$24 # 6 cycles from $24 load \n\ | |
368 | \n\ | |
369 | stq $6,48($17) \n\ | |
370 | xor $22,$24,$24 \n\ | |
371 | subq $16,1,$16 \n\ | |
372 | xor $24,$25,$25 # 8 cycles from $25 load \n\ | |
373 | \n\ | |
374 | stq $25,56($17) \n\ | |
375 | addq $21,64,$21 \n\ | |
376 | addq $20,64,$20 \n\ | |
377 | addq $19,64,$19 \n\ | |
378 | \n\ | |
379 | addq $18,64,$18 \n\ | |
380 | addq $17,64,$17 \n\ | |
381 | bgt $16,5b \n\ | |
382 | ret \n\ | |
383 | .end xor_alpha_5 \n\ | |
384 | \n\ | |
385 | .align 3 \n\ | |
386 | .ent xor_alpha_prefetch_2 \n\ | |
387 | xor_alpha_prefetch_2: \n\ | |
388 | .prologue 0 \n\ | |
389 | srl $16, 6, $16 \n\ | |
390 | \n\ | |
391 | ldq $31, 0($17) \n\ | |
392 | ldq $31, 0($18) \n\ | |
393 | \n\ | |
394 | ldq $31, 64($17) \n\ | |
395 | ldq $31, 64($18) \n\ | |
396 | \n\ | |
397 | ldq $31, 128($17) \n\ | |
398 | ldq $31, 128($18) \n\ | |
399 | \n\ | |
400 | ldq $31, 192($17) \n\ | |
401 | ldq $31, 192($18) \n\ | |
402 | .align 4 \n\ | |
403 | 2: \n\ | |
404 | ldq $0,0($17) \n\ | |
405 | ldq $1,0($18) \n\ | |
406 | ldq $2,8($17) \n\ | |
407 | ldq $3,8($18) \n\ | |
408 | \n\ | |
409 | ldq $4,16($17) \n\ | |
410 | ldq $5,16($18) \n\ | |
411 | ldq $6,24($17) \n\ | |
412 | ldq $7,24($18) \n\ | |
413 | \n\ | |
414 | ldq $19,32($17) \n\ | |
415 | ldq $20,32($18) \n\ | |
416 | ldq $21,40($17) \n\ | |
417 | ldq $22,40($18) \n\ | |
418 | \n\ | |
419 | ldq $23,48($17) \n\ | |
420 | ldq $24,48($18) \n\ | |
421 | ldq $25,56($17) \n\ | |
422 | ldq $27,56($18) \n\ | |
423 | \n\ | |
424 | ldq $31,256($17) \n\ | |
425 | xor $0,$1,$0 # 8 cycles from $1 load \n\ | |
426 | ldq $31,256($18) \n\ | |
427 | xor $2,$3,$2 \n\ | |
428 | \n\ | |
429 | stq $0,0($17) \n\ | |
430 | xor $4,$5,$4 \n\ | |
431 | stq $2,8($17) \n\ | |
432 | xor $6,$7,$6 \n\ | |
433 | \n\ | |
434 | stq $4,16($17) \n\ | |
435 | xor $19,$20,$19 \n\ | |
436 | stq $6,24($17) \n\ | |
437 | xor $21,$22,$21 \n\ | |
438 | \n\ | |
439 | stq $19,32($17) \n\ | |
440 | xor $23,$24,$23 \n\ | |
441 | stq $21,40($17) \n\ | |
442 | xor $25,$27,$25 \n\ | |
443 | \n\ | |
444 | stq $23,48($17) \n\ | |
445 | subq $16,1,$16 \n\ | |
446 | stq $25,56($17) \n\ | |
447 | addq $17,64,$17 \n\ | |
448 | \n\ | |
449 | addq $18,64,$18 \n\ | |
450 | bgt $16,2b \n\ | |
451 | ret \n\ | |
452 | .end xor_alpha_prefetch_2 \n\ | |
453 | \n\ | |
454 | .align 3 \n\ | |
455 | .ent xor_alpha_prefetch_3 \n\ | |
456 | xor_alpha_prefetch_3: \n\ | |
457 | .prologue 0 \n\ | |
458 | srl $16, 6, $16 \n\ | |
459 | \n\ | |
460 | ldq $31, 0($17) \n\ | |
461 | ldq $31, 0($18) \n\ | |
462 | ldq $31, 0($19) \n\ | |
463 | \n\ | |
464 | ldq $31, 64($17) \n\ | |
465 | ldq $31, 64($18) \n\ | |
466 | ldq $31, 64($19) \n\ | |
467 | \n\ | |
468 | ldq $31, 128($17) \n\ | |
469 | ldq $31, 128($18) \n\ | |
470 | ldq $31, 128($19) \n\ | |
471 | \n\ | |
472 | ldq $31, 192($17) \n\ | |
473 | ldq $31, 192($18) \n\ | |
474 | ldq $31, 192($19) \n\ | |
475 | .align 4 \n\ | |
476 | 3: \n\ | |
477 | ldq $0,0($17) \n\ | |
478 | ldq $1,0($18) \n\ | |
479 | ldq $2,0($19) \n\ | |
480 | ldq $3,8($17) \n\ | |
481 | \n\ | |
482 | ldq $4,8($18) \n\ | |
483 | ldq $6,16($17) \n\ | |
484 | ldq $7,16($18) \n\ | |
485 | ldq $21,24($17) \n\ | |
486 | \n\ | |
487 | ldq $22,24($18) \n\ | |
488 | ldq $24,32($17) \n\ | |
489 | ldq $25,32($18) \n\ | |
490 | ldq $5,8($19) \n\ | |
491 | \n\ | |
492 | ldq $20,16($19) \n\ | |
493 | ldq $23,24($19) \n\ | |
494 | ldq $27,32($19) \n\ | |
495 | nop \n\ | |
496 | \n\ | |
497 | xor $0,$1,$1 # 8 cycles from $0 load \n\ | |
498 | xor $3,$4,$4 # 7 cycles from $4 load \n\ | |
499 | xor $6,$7,$7 # 6 cycles from $7 load \n\ | |
500 | xor $21,$22,$22 # 5 cycles from $22 load \n\ | |
501 | \n\ | |
502 | xor $1,$2,$2 # 9 cycles from $2 load \n\ | |
503 | xor $24,$25,$25 # 5 cycles from $25 load \n\ | |
504 | stq $2,0($17) \n\ | |
505 | xor $4,$5,$5 # 6 cycles from $5 load \n\ | |
506 | \n\ | |
507 | stq $5,8($17) \n\ | |
508 | xor $7,$20,$20 # 7 cycles from $20 load \n\ | |
509 | stq $20,16($17) \n\ | |
510 | xor $22,$23,$23 # 7 cycles from $23 load \n\ | |
511 | \n\ | |
512 | stq $23,24($17) \n\ | |
513 | xor $25,$27,$27 # 7 cycles from $27 load \n\ | |
514 | stq $27,32($17) \n\ | |
515 | nop \n\ | |
516 | \n\ | |
517 | ldq $0,40($17) \n\ | |
518 | ldq $1,40($18) \n\ | |
519 | ldq $3,48($17) \n\ | |
520 | ldq $4,48($18) \n\ | |
521 | \n\ | |
522 | ldq $6,56($17) \n\ | |
523 | ldq $7,56($18) \n\ | |
524 | ldq $2,40($19) \n\ | |
525 | ldq $5,48($19) \n\ | |
526 | \n\ | |
527 | ldq $20,56($19) \n\ | |
528 | ldq $31,256($17) \n\ | |
529 | ldq $31,256($18) \n\ | |
530 | ldq $31,256($19) \n\ | |
531 | \n\ | |
532 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | |
533 | xor $3,$4,$4 # 5 cycles from $4 load \n\ | |
534 | xor $6,$7,$7 # 5 cycles from $7 load \n\ | |
535 | xor $1,$2,$2 # 4 cycles from $2 load \n\ | |
536 | \n\ | |
537 | xor $4,$5,$5 # 5 cycles from $5 load \n\ | |
538 | xor $7,$20,$20 # 4 cycles from $20 load \n\ | |
539 | stq $2,40($17) \n\ | |
540 | subq $16,1,$16 \n\ | |
541 | \n\ | |
542 | stq $5,48($17) \n\ | |
543 | addq $19,64,$19 \n\ | |
544 | stq $20,56($17) \n\ | |
545 | addq $18,64,$18 \n\ | |
546 | \n\ | |
547 | addq $17,64,$17 \n\ | |
548 | bgt $16,3b \n\ | |
549 | ret \n\ | |
550 | .end xor_alpha_prefetch_3 \n\ | |
551 | \n\ | |
552 | .align 3 \n\ | |
553 | .ent xor_alpha_prefetch_4 \n\ | |
554 | xor_alpha_prefetch_4: \n\ | |
555 | .prologue 0 \n\ | |
556 | srl $16, 6, $16 \n\ | |
557 | \n\ | |
558 | ldq $31, 0($17) \n\ | |
559 | ldq $31, 0($18) \n\ | |
560 | ldq $31, 0($19) \n\ | |
561 | ldq $31, 0($20) \n\ | |
562 | \n\ | |
563 | ldq $31, 64($17) \n\ | |
564 | ldq $31, 64($18) \n\ | |
565 | ldq $31, 64($19) \n\ | |
566 | ldq $31, 64($20) \n\ | |
567 | \n\ | |
568 | ldq $31, 128($17) \n\ | |
569 | ldq $31, 128($18) \n\ | |
570 | ldq $31, 128($19) \n\ | |
571 | ldq $31, 128($20) \n\ | |
572 | \n\ | |
573 | ldq $31, 192($17) \n\ | |
574 | ldq $31, 192($18) \n\ | |
575 | ldq $31, 192($19) \n\ | |
576 | ldq $31, 192($20) \n\ | |
577 | .align 4 \n\ | |
578 | 4: \n\ | |
579 | ldq $0,0($17) \n\ | |
580 | ldq $1,0($18) \n\ | |
581 | ldq $2,0($19) \n\ | |
582 | ldq $3,0($20) \n\ | |
583 | \n\ | |
584 | ldq $4,8($17) \n\ | |
585 | ldq $5,8($18) \n\ | |
586 | ldq $6,8($19) \n\ | |
587 | ldq $7,8($20) \n\ | |
588 | \n\ | |
589 | ldq $21,16($17) \n\ | |
590 | ldq $22,16($18) \n\ | |
591 | ldq $23,16($19) \n\ | |
592 | ldq $24,16($20) \n\ | |
593 | \n\ | |
594 | ldq $25,24($17) \n\ | |
595 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | |
596 | ldq $27,24($18) \n\ | |
597 | xor $2,$3,$3 # 6 cycles from $3 load \n\ | |
598 | \n\ | |
599 | ldq $0,24($19) \n\ | |
600 | xor $1,$3,$3 \n\ | |
601 | ldq $1,24($20) \n\ | |
602 | xor $4,$5,$5 # 7 cycles from $5 load \n\ | |
603 | \n\ | |
604 | stq $3,0($17) \n\ | |
605 | xor $6,$7,$7 \n\ | |
606 | xor $21,$22,$22 # 7 cycles from $22 load \n\ | |
607 | xor $5,$7,$7 \n\ | |
608 | \n\ | |
609 | stq $7,8($17) \n\ | |
610 | xor $23,$24,$24 # 7 cycles from $24 load \n\ | |
611 | ldq $2,32($17) \n\ | |
612 | xor $22,$24,$24 \n\ | |
613 | \n\ | |
614 | ldq $3,32($18) \n\ | |
615 | ldq $4,32($19) \n\ | |
616 | ldq $5,32($20) \n\ | |
617 | xor $25,$27,$27 # 8 cycles from $27 load \n\ | |
618 | \n\ | |
619 | ldq $6,40($17) \n\ | |
620 | ldq $7,40($18) \n\ | |
621 | ldq $21,40($19) \n\ | |
622 | ldq $22,40($20) \n\ | |
623 | \n\ | |
624 | stq $24,16($17) \n\ | |
625 | xor $0,$1,$1 # 9 cycles from $1 load \n\ | |
626 | xor $2,$3,$3 # 5 cycles from $3 load \n\ | |
627 | xor $27,$1,$1 \n\ | |
628 | \n\ | |
629 | stq $1,24($17) \n\ | |
630 | xor $4,$5,$5 # 5 cycles from $5 load \n\ | |
631 | ldq $23,48($17) \n\ | |
632 | xor $3,$5,$5 \n\ | |
633 | \n\ | |
634 | ldq $24,48($18) \n\ | |
635 | ldq $25,48($19) \n\ | |
636 | ldq $27,48($20) \n\ | |
637 | ldq $0,56($17) \n\ | |
638 | \n\ | |
639 | ldq $1,56($18) \n\ | |
640 | ldq $2,56($19) \n\ | |
641 | ldq $3,56($20) \n\ | |
642 | xor $6,$7,$7 # 8 cycles from $6 load \n\ | |
643 | \n\ | |
644 | ldq $31,256($17) \n\ | |
645 | xor $21,$22,$22 # 8 cycles from $22 load \n\ | |
646 | ldq $31,256($18) \n\ | |
647 | xor $7,$22,$22 \n\ | |
648 | \n\ | |
649 | ldq $31,256($19) \n\ | |
650 | xor $23,$24,$24 # 6 cycles from $24 load \n\ | |
651 | ldq $31,256($20) \n\ | |
652 | xor $25,$27,$27 # 6 cycles from $27 load \n\ | |
653 | \n\ | |
654 | stq $5,32($17) \n\ | |
655 | xor $24,$27,$27 \n\ | |
656 | xor $0,$1,$1 # 7 cycles from $1 load \n\ | |
657 | xor $2,$3,$3 # 6 cycles from $3 load \n\ | |
658 | \n\ | |
659 | stq $22,40($17) \n\ | |
660 | xor $1,$3,$3 \n\ | |
661 | stq $27,48($17) \n\ | |
662 | subq $16,1,$16 \n\ | |
663 | \n\ | |
664 | stq $3,56($17) \n\ | |
665 | addq $20,64,$20 \n\ | |
666 | addq $19,64,$19 \n\ | |
667 | addq $18,64,$18 \n\ | |
668 | \n\ | |
669 | addq $17,64,$17 \n\ | |
670 | bgt $16,4b \n\ | |
671 | ret \n\ | |
672 | .end xor_alpha_prefetch_4 \n\ | |
673 | \n\ | |
674 | .align 3 \n\ | |
675 | .ent xor_alpha_prefetch_5 \n\ | |
676 | xor_alpha_prefetch_5: \n\ | |
677 | .prologue 0 \n\ | |
678 | srl $16, 6, $16 \n\ | |
679 | \n\ | |
680 | ldq $31, 0($17) \n\ | |
681 | ldq $31, 0($18) \n\ | |
682 | ldq $31, 0($19) \n\ | |
683 | ldq $31, 0($20) \n\ | |
684 | ldq $31, 0($21) \n\ | |
685 | \n\ | |
686 | ldq $31, 64($17) \n\ | |
687 | ldq $31, 64($18) \n\ | |
688 | ldq $31, 64($19) \n\ | |
689 | ldq $31, 64($20) \n\ | |
690 | ldq $31, 64($21) \n\ | |
691 | \n\ | |
692 | ldq $31, 128($17) \n\ | |
693 | ldq $31, 128($18) \n\ | |
694 | ldq $31, 128($19) \n\ | |
695 | ldq $31, 128($20) \n\ | |
696 | ldq $31, 128($21) \n\ | |
697 | \n\ | |
698 | ldq $31, 192($17) \n\ | |
699 | ldq $31, 192($18) \n\ | |
700 | ldq $31, 192($19) \n\ | |
701 | ldq $31, 192($20) \n\ | |
702 | ldq $31, 192($21) \n\ | |
703 | .align 4 \n\ | |
704 | 5: \n\ | |
705 | ldq $0,0($17) \n\ | |
706 | ldq $1,0($18) \n\ | |
707 | ldq $2,0($19) \n\ | |
708 | ldq $3,0($20) \n\ | |
709 | \n\ | |
710 | ldq $4,0($21) \n\ | |
711 | ldq $5,8($17) \n\ | |
712 | ldq $6,8($18) \n\ | |
713 | ldq $7,8($19) \n\ | |
714 | \n\ | |
715 | ldq $22,8($20) \n\ | |
716 | ldq $23,8($21) \n\ | |
717 | ldq $24,16($17) \n\ | |
718 | ldq $25,16($18) \n\ | |
719 | \n\ | |
720 | ldq $27,16($19) \n\ | |
721 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | |
722 | ldq $28,16($20) \n\ | |
723 | xor $2,$3,$3 # 6 cycles from $3 load \n\ | |
724 | \n\ | |
725 | ldq $0,16($21) \n\ | |
726 | xor $1,$3,$3 \n\ | |
727 | ldq $1,24($17) \n\ | |
728 | xor $3,$4,$4 # 7 cycles from $4 load \n\ | |
729 | \n\ | |
730 | stq $4,0($17) \n\ | |
731 | xor $5,$6,$6 # 7 cycles from $6 load \n\ | |
732 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | |
733 | xor $6,$23,$23 # 7 cycles from $23 load \n\ | |
734 | \n\ | |
735 | ldq $2,24($18) \n\ | |
736 | xor $22,$23,$23 \n\ | |
737 | ldq $3,24($19) \n\ | |
738 | xor $24,$25,$25 # 8 cycles from $25 load \n\ | |
739 | \n\ | |
740 | stq $23,8($17) \n\ | |
741 | xor $25,$27,$27 # 8 cycles from $27 load \n\ | |
742 | ldq $4,24($20) \n\ | |
743 | xor $28,$0,$0 # 7 cycles from $0 load \n\ | |
744 | \n\ | |
745 | ldq $5,24($21) \n\ | |
746 | xor $27,$0,$0 \n\ | |
747 | ldq $6,32($17) \n\ | |
748 | ldq $7,32($18) \n\ | |
749 | \n\ | |
750 | stq $0,16($17) \n\ | |
751 | xor $1,$2,$2 # 6 cycles from $2 load \n\ | |
752 | ldq $22,32($19) \n\ | |
753 | xor $3,$4,$4 # 4 cycles from $4 load \n\ | |
754 | \n\ | |
755 | ldq $23,32($20) \n\ | |
756 | xor $2,$4,$4 \n\ | |
757 | ldq $24,32($21) \n\ | |
758 | ldq $25,40($17) \n\ | |
759 | \n\ | |
760 | ldq $27,40($18) \n\ | |
761 | ldq $28,40($19) \n\ | |
762 | ldq $0,40($20) \n\ | |
763 | xor $4,$5,$5 # 7 cycles from $5 load \n\ | |
764 | \n\ | |
765 | stq $5,24($17) \n\ | |
766 | xor $6,$7,$7 # 7 cycles from $7 load \n\ | |
767 | ldq $1,40($21) \n\ | |
768 | ldq $2,48($17) \n\ | |
769 | \n\ | |
770 | ldq $3,48($18) \n\ | |
771 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | |
772 | ldq $4,48($19) \n\ | |
773 | xor $23,$24,$24 # 6 cycles from $24 load \n\ | |
774 | \n\ | |
775 | ldq $5,48($20) \n\ | |
776 | xor $22,$24,$24 \n\ | |
777 | ldq $6,48($21) \n\ | |
778 | xor $25,$27,$27 # 7 cycles from $27 load \n\ | |
779 | \n\ | |
780 | stq $24,32($17) \n\ | |
781 | xor $27,$28,$28 # 8 cycles from $28 load \n\ | |
782 | ldq $7,56($17) \n\ | |
783 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | |
784 | \n\ | |
785 | ldq $22,56($18) \n\ | |
786 | ldq $23,56($19) \n\ | |
787 | ldq $24,56($20) \n\ | |
788 | ldq $25,56($21) \n\ | |
789 | \n\ | |
790 | ldq $31,256($17) \n\ | |
791 | xor $28,$1,$1 \n\ | |
792 | ldq $31,256($18) \n\ | |
793 | xor $2,$3,$3 # 9 cycles from $3 load \n\ | |
794 | \n\ | |
795 | ldq $31,256($19) \n\ | |
796 | xor $3,$4,$4 # 9 cycles from $4 load \n\ | |
797 | ldq $31,256($20) \n\ | |
798 | xor $5,$6,$6 # 8 cycles from $6 load \n\ | |
799 | \n\ | |
800 | stq $1,40($17) \n\ | |
801 | xor $4,$6,$6 \n\ | |
802 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | |
803 | xor $23,$24,$24 # 6 cycles from $24 load \n\ | |
804 | \n\ | |
805 | stq $6,48($17) \n\ | |
806 | xor $22,$24,$24 \n\ | |
807 | ldq $31,256($21) \n\ | |
808 | xor $24,$25,$25 # 8 cycles from $25 load \n\ | |
809 | \n\ | |
810 | stq $25,56($17) \n\ | |
811 | subq $16,1,$16 \n\ | |
812 | addq $21,64,$21 \n\ | |
813 | addq $20,64,$20 \n\ | |
814 | \n\ | |
815 | addq $19,64,$19 \n\ | |
816 | addq $18,64,$18 \n\ | |
817 | addq $17,64,$17 \n\ | |
818 | bgt $16,5b \n\ | |
819 | \n\ | |
820 | ret \n\ | |
821 | .end xor_alpha_prefetch_5 \n\ | |
822 | "); | |
823 | ||
824 | static struct xor_block_template xor_block_alpha = { | |
825 | .name = "alpha", | |
826 | .do_2 = xor_alpha_2, | |
827 | .do_3 = xor_alpha_3, | |
828 | .do_4 = xor_alpha_4, | |
829 | .do_5 = xor_alpha_5, | |
830 | }; | |
831 | ||
832 | static struct xor_block_template xor_block_alpha_prefetch = { | |
833 | .name = "alpha prefetch", | |
834 | .do_2 = xor_alpha_prefetch_2, | |
835 | .do_3 = xor_alpha_prefetch_3, | |
836 | .do_4 = xor_alpha_prefetch_4, | |
837 | .do_5 = xor_alpha_prefetch_5, | |
838 | }; | |
839 | ||
840 | /* For grins, also test the generic routines. */ | |
841 | #include <asm-generic/xor.h> | |
842 | ||
843 | #undef XOR_TRY_TEMPLATES | |
844 | #define XOR_TRY_TEMPLATES \ | |
845 | do { \ | |
846 | xor_speed(&xor_block_8regs); \ | |
847 | xor_speed(&xor_block_32regs); \ | |
848 | xor_speed(&xor_block_alpha); \ | |
849 | xor_speed(&xor_block_alpha_prefetch); \ | |
850 | } while (0) | |
851 | ||
852 | /* Force the use of alpha_prefetch if EV6, as it is significantly | |
853 | faster in the cold cache case. */ | |
854 | #define XOR_SELECT_TEMPLATE(FASTEST) \ | |
855 | (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST) |