Commit | Line | Data |
---|---|---|
60468255 JK |
1 | /* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function |
2 | * | |
3 | * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms of the GNU General Public License as published by the Free | |
7 | * Software Foundation; either version 2 of the License, or (at your option) | |
8 | * any later version. | |
9 | */ | |
10 | ||
11 | #include <linux/linkage.h> | |
0777e3e1 | 12 | #include <asm/assembler.h> |
60468255 JK |
13 | |
14 | .syntax unified | |
60468255 JK |
15 | .fpu neon |
16 | ||
17 | .text | |
18 | ||
19 | ||
20 | /* Context structure */ | |
21 | ||
22 | #define state_h0 0 | |
23 | #define state_h1 4 | |
24 | #define state_h2 8 | |
25 | #define state_h3 12 | |
26 | #define state_h4 16 | |
27 | ||
28 | ||
29 | /* Constants */ | |
30 | ||
31 | #define K1 0x5A827999 | |
32 | #define K2 0x6ED9EBA1 | |
33 | #define K3 0x8F1BBCDC | |
34 | #define K4 0xCA62C1D6 | |
35 | .align 4 | |
36 | .LK_VEC: | |
37 | .LK1: .long K1, K1, K1, K1 | |
38 | .LK2: .long K2, K2, K2, K2 | |
39 | .LK3: .long K3, K3, K3, K3 | |
40 | .LK4: .long K4, K4, K4, K4 | |
41 | ||
42 | ||
43 | /* Register macros */ | |
44 | ||
45 | #define RSTATE r0 | |
46 | #define RDATA r1 | |
47 | #define RNBLKS r2 | |
48 | #define ROLDSTACK r3 | |
49 | #define RWK lr | |
50 | ||
51 | #define _a r4 | |
52 | #define _b r5 | |
53 | #define _c r6 | |
54 | #define _d r7 | |
55 | #define _e r8 | |
56 | ||
57 | #define RT0 r9 | |
58 | #define RT1 r10 | |
59 | #define RT2 r11 | |
60 | #define RT3 r12 | |
61 | ||
62 | #define W0 q0 | |
0777e3e1 | 63 | #define W1 q7 |
60468255 JK |
64 | #define W2 q2 |
65 | #define W3 q3 | |
66 | #define W4 q4 | |
0777e3e1 AB |
67 | #define W5 q6 |
68 | #define W6 q5 | |
69 | #define W7 q1 | |
60468255 JK |
70 | |
71 | #define tmp0 q8 | |
72 | #define tmp1 q9 | |
73 | #define tmp2 q10 | |
74 | #define tmp3 q11 | |
75 | ||
76 | #define qK1 q12 | |
77 | #define qK2 q13 | |
78 | #define qK3 q14 | |
79 | #define qK4 q15 | |
80 | ||
0777e3e1 AB |
81 | #ifdef CONFIG_CPU_BIG_ENDIAN |
82 | #define ARM_LE(code...) | |
83 | #else | |
84 | #define ARM_LE(code...) code | |
85 | #endif | |
60468255 JK |
86 | |
87 | /* Round function macros. */ | |
88 | ||
89 | #define WK_offs(i) (((i) & 15) * 4) | |
90 | ||
91 | #define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
92 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
93 | ldr RT3, [sp, WK_offs(i)]; \ | |
94 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
95 | bic RT0, d, b; \ | |
96 | add e, e, a, ror #(32 - 5); \ | |
97 | and RT1, c, b; \ | |
98 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
99 | add RT0, RT0, RT3; \ | |
100 | add e, e, RT1; \ | |
101 | ror b, #(32 - 30); \ | |
102 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
103 | add e, e, RT0; | |
104 | ||
105 | #define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
106 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
107 | ldr RT3, [sp, WK_offs(i)]; \ | |
108 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
109 | eor RT0, d, b; \ | |
110 | add e, e, a, ror #(32 - 5); \ | |
111 | eor RT0, RT0, c; \ | |
112 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
113 | add e, e, RT3; \ | |
114 | ror b, #(32 - 30); \ | |
115 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
116 | add e, e, RT0; \ | |
117 | ||
118 | #define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
119 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
120 | ldr RT3, [sp, WK_offs(i)]; \ | |
121 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
122 | eor RT0, b, c; \ | |
123 | and RT1, b, c; \ | |
124 | add e, e, a, ror #(32 - 5); \ | |
125 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
126 | and RT0, RT0, d; \ | |
127 | add RT1, RT1, RT3; \ | |
128 | add e, e, RT0; \ | |
129 | ror b, #(32 - 30); \ | |
130 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
131 | add e, e, RT1; | |
132 | ||
133 | #define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
134 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
135 | _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
136 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) | |
137 | ||
138 | #define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\ | |
139 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
140 | _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
141 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) | |
142 | ||
143 | #define R(a,b,c,d,e,f,i) \ | |
144 | _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\ | |
145 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) | |
146 | ||
147 | #define dummy(...) | |
148 | ||
149 | ||
150 | /* Input expansion macros. */ | |
151 | ||
152 | /********* Precalc macros for rounds 0-15 *************************************/ | |
153 | ||
154 | #define W_PRECALC_00_15() \ | |
155 | add RWK, sp, #(WK_offs(0)); \ | |
156 | \ | |
0777e3e1 AB |
157 | vld1.32 {W0, W7}, [RDATA]!; \ |
158 | ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ | |
159 | vld1.32 {W6, W5}, [RDATA]!; \ | |
60468255 | 160 | vadd.u32 tmp0, W0, curK; \ |
0777e3e1 AB |
161 | ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ |
162 | ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ | |
60468255 | 163 | vadd.u32 tmp1, W7, curK; \ |
0777e3e1 | 164 | ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ |
60468255 JK |
165 | vadd.u32 tmp2, W6, curK; \ |
166 | vst1.32 {tmp0, tmp1}, [RWK]!; \ | |
167 | vadd.u32 tmp3, W5, curK; \ | |
168 | vst1.32 {tmp2, tmp3}, [RWK]; \ | |
169 | ||
170 | #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 171 | vld1.32 {W0, W7}, [RDATA]!; \ |
60468255 JK |
172 | |
173 | #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
174 | add RWK, sp, #(WK_offs(0)); \ | |
175 | ||
176 | #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 177 | ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ |
60468255 JK |
178 | |
179 | #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 180 | vld1.32 {W6, W5}, [RDATA]!; \ |
60468255 JK |
181 | |
182 | #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
183 | vadd.u32 tmp0, W0, curK; \ | |
184 | ||
185 | #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 186 | ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ |
60468255 JK |
187 | |
188 | #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 189 | ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ |
60468255 JK |
190 | |
191 | #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
192 | vadd.u32 tmp1, W7, curK; \ | |
193 | ||
194 | #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 195 | ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ |
60468255 JK |
196 | |
197 | #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
198 | vadd.u32 tmp2, W6, curK; \ | |
199 | ||
200 | #define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
201 | vst1.32 {tmp0, tmp1}, [RWK]!; \ | |
202 | ||
203 | #define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
204 | vadd.u32 tmp3, W5, curK; \ | |
205 | ||
206 | #define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
207 | vst1.32 {tmp2, tmp3}, [RWK]; \ | |
208 | ||
209 | ||
210 | /********* Precalc macros for rounds 16-31 ************************************/ | |
211 | ||
212 | #define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
213 | veor tmp0, tmp0; \ | |
214 | vext.8 W, W_m16, W_m12, #8; \ | |
215 | ||
216 | #define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
217 | add RWK, sp, #(WK_offs(i)); \ | |
218 | vext.8 tmp0, W_m04, tmp0, #4; \ | |
219 | ||
220 | #define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
221 | veor tmp0, tmp0, W_m16; \ | |
222 | veor.32 W, W, W_m08; \ | |
223 | ||
224 | #define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
225 | veor tmp1, tmp1; \ | |
226 | veor W, W, tmp0; \ | |
227 | ||
228 | #define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
229 | vshl.u32 tmp0, W, #1; \ | |
230 | ||
231 | #define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
232 | vext.8 tmp1, tmp1, W, #(16-12); \ | |
233 | vshr.u32 W, W, #31; \ | |
234 | ||
235 | #define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
236 | vorr tmp0, tmp0, W; \ | |
237 | vshr.u32 W, tmp1, #30; \ | |
238 | ||
239 | #define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
240 | vshl.u32 tmp1, tmp1, #2; \ | |
241 | ||
242 | #define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
243 | veor tmp0, tmp0, W; \ | |
244 | ||
245 | #define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
246 | veor W, tmp0, tmp1; \ | |
247 | ||
248 | #define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
249 | vadd.u32 tmp0, W, curK; \ | |
250 | ||
251 | #define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
252 | vst1.32 {tmp0}, [RWK]; | |
253 | ||
254 | ||
255 | /********* Precalc macros for rounds 32-79 ************************************/ | |
256 | ||
257 | #define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
258 | veor W, W_m28; \ | |
259 | ||
260 | #define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
261 | vext.8 tmp0, W_m08, W_m04, #8; \ | |
262 | ||
263 | #define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
264 | veor W, W_m16; \ | |
265 | ||
266 | #define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
267 | veor W, tmp0; \ | |
268 | ||
269 | #define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
270 | add RWK, sp, #(WK_offs(i&~3)); \ | |
271 | ||
272 | #define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
273 | vshl.u32 tmp1, W, #2; \ | |
274 | ||
275 | #define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
276 | vshr.u32 tmp0, W, #30; \ | |
277 | ||
278 | #define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
279 | vorr W, tmp0, tmp1; \ | |
280 | ||
281 | #define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
282 | vadd.u32 tmp0, W, curK; \ | |
283 | ||
284 | #define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
285 | vst1.32 {tmp0}, [RWK]; | |
286 | ||
287 | ||
288 | /* | |
289 | * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. | |
290 | * | |
291 | * unsigned int | |
292 | * sha1_transform_neon (void *ctx, const unsigned char *data, | |
293 | * unsigned int nblks) | |
294 | */ | |
295 | .align 3 | |
296 | ENTRY(sha1_transform_neon) | |
297 | /* input: | |
298 | * r0: ctx, CTX | |
299 | * r1: data (64*nblks bytes) | |
300 | * r2: nblks | |
301 | */ | |
302 | ||
303 | cmp RNBLKS, #0; | |
304 | beq .Ldo_nothing; | |
305 | ||
306 | push {r4-r12, lr}; | |
307 | /*vpush {q4-q7};*/ | |
308 | ||
309 | adr RT3, .LK_VEC; | |
310 | ||
311 | mov ROLDSTACK, sp; | |
312 | ||
313 | /* Align stack. */ | |
314 | sub RT0, sp, #(16*4); | |
315 | and RT0, #(~(16-1)); | |
316 | mov sp, RT0; | |
317 | ||
318 | vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */ | |
319 | ||
320 | /* Get the values of the chaining variables. */ | |
321 | ldm RSTATE, {_a-_e}; | |
322 | ||
323 | vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */ | |
324 | ||
325 | #undef curK | |
326 | #define curK qK1 | |
327 | /* Precalc 0-15. */ | |
328 | W_PRECALC_00_15(); | |
329 | ||
330 | .Loop: | |
331 | /* Transform 0-15 + Precalc 16-31. */ | |
332 | _R( _a, _b, _c, _d, _e, F1, 0, | |
333 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, | |
334 | W4, W5, W6, W7, W0, _, _, _ ); | |
335 | _R( _e, _a, _b, _c, _d, F1, 1, | |
336 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, | |
337 | W4, W5, W6, W7, W0, _, _, _ ); | |
338 | _R( _d, _e, _a, _b, _c, F1, 2, | |
339 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, | |
340 | W4, W5, W6, W7, W0, _, _, _ ); | |
341 | _R( _c, _d, _e, _a, _b, F1, 3, | |
342 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, | |
343 | W4, W5, W6, W7, W0, _, _, _ ); | |
344 | ||
345 | #undef curK | |
346 | #define curK qK2 | |
347 | _R( _b, _c, _d, _e, _a, F1, 4, | |
348 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, | |
349 | W3, W4, W5, W6, W7, _, _, _ ); | |
350 | _R( _a, _b, _c, _d, _e, F1, 5, | |
351 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, | |
352 | W3, W4, W5, W6, W7, _, _, _ ); | |
353 | _R( _e, _a, _b, _c, _d, F1, 6, | |
354 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, | |
355 | W3, W4, W5, W6, W7, _, _, _ ); | |
356 | _R( _d, _e, _a, _b, _c, F1, 7, | |
357 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, | |
358 | W3, W4, W5, W6, W7, _, _, _ ); | |
359 | ||
360 | _R( _c, _d, _e, _a, _b, F1, 8, | |
361 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, | |
362 | W2, W3, W4, W5, W6, _, _, _ ); | |
363 | _R( _b, _c, _d, _e, _a, F1, 9, | |
364 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, | |
365 | W2, W3, W4, W5, W6, _, _, _ ); | |
366 | _R( _a, _b, _c, _d, _e, F1, 10, | |
367 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, | |
368 | W2, W3, W4, W5, W6, _, _, _ ); | |
369 | _R( _e, _a, _b, _c, _d, F1, 11, | |
370 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, | |
371 | W2, W3, W4, W5, W6, _, _, _ ); | |
372 | ||
373 | _R( _d, _e, _a, _b, _c, F1, 12, | |
374 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, | |
375 | W1, W2, W3, W4, W5, _, _, _ ); | |
376 | _R( _c, _d, _e, _a, _b, F1, 13, | |
377 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, | |
378 | W1, W2, W3, W4, W5, _, _, _ ); | |
379 | _R( _b, _c, _d, _e, _a, F1, 14, | |
380 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, | |
381 | W1, W2, W3, W4, W5, _, _, _ ); | |
382 | _R( _a, _b, _c, _d, _e, F1, 15, | |
383 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, | |
384 | W1, W2, W3, W4, W5, _, _, _ ); | |
385 | ||
386 | /* Transform 16-63 + Precalc 32-79. */ | |
387 | _R( _e, _a, _b, _c, _d, F1, 16, | |
388 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, | |
389 | W0, W1, W2, W3, W4, W5, W6, W7); | |
390 | _R( _d, _e, _a, _b, _c, F1, 17, | |
391 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, | |
392 | W0, W1, W2, W3, W4, W5, W6, W7); | |
393 | _R( _c, _d, _e, _a, _b, F1, 18, | |
394 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, | |
395 | W0, W1, W2, W3, W4, W5, W6, W7); | |
396 | _R( _b, _c, _d, _e, _a, F1, 19, | |
397 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, | |
398 | W0, W1, W2, W3, W4, W5, W6, W7); | |
399 | ||
400 | _R( _a, _b, _c, _d, _e, F2, 20, | |
401 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, | |
402 | W7, W0, W1, W2, W3, W4, W5, W6); | |
403 | _R( _e, _a, _b, _c, _d, F2, 21, | |
404 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, | |
405 | W7, W0, W1, W2, W3, W4, W5, W6); | |
406 | _R( _d, _e, _a, _b, _c, F2, 22, | |
407 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, | |
408 | W7, W0, W1, W2, W3, W4, W5, W6); | |
409 | _R( _c, _d, _e, _a, _b, F2, 23, | |
410 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, | |
411 | W7, W0, W1, W2, W3, W4, W5, W6); | |
412 | ||
413 | #undef curK | |
414 | #define curK qK3 | |
415 | _R( _b, _c, _d, _e, _a, F2, 24, | |
416 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, | |
417 | W6, W7, W0, W1, W2, W3, W4, W5); | |
418 | _R( _a, _b, _c, _d, _e, F2, 25, | |
419 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, | |
420 | W6, W7, W0, W1, W2, W3, W4, W5); | |
421 | _R( _e, _a, _b, _c, _d, F2, 26, | |
422 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, | |
423 | W6, W7, W0, W1, W2, W3, W4, W5); | |
424 | _R( _d, _e, _a, _b, _c, F2, 27, | |
425 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, | |
426 | W6, W7, W0, W1, W2, W3, W4, W5); | |
427 | ||
428 | _R( _c, _d, _e, _a, _b, F2, 28, | |
429 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, | |
430 | W5, W6, W7, W0, W1, W2, W3, W4); | |
431 | _R( _b, _c, _d, _e, _a, F2, 29, | |
432 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, | |
433 | W5, W6, W7, W0, W1, W2, W3, W4); | |
434 | _R( _a, _b, _c, _d, _e, F2, 30, | |
435 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, | |
436 | W5, W6, W7, W0, W1, W2, W3, W4); | |
437 | _R( _e, _a, _b, _c, _d, F2, 31, | |
438 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, | |
439 | W5, W6, W7, W0, W1, W2, W3, W4); | |
440 | ||
441 | _R( _d, _e, _a, _b, _c, F2, 32, | |
442 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, | |
443 | W4, W5, W6, W7, W0, W1, W2, W3); | |
444 | _R( _c, _d, _e, _a, _b, F2, 33, | |
445 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, | |
446 | W4, W5, W6, W7, W0, W1, W2, W3); | |
447 | _R( _b, _c, _d, _e, _a, F2, 34, | |
448 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, | |
449 | W4, W5, W6, W7, W0, W1, W2, W3); | |
450 | _R( _a, _b, _c, _d, _e, F2, 35, | |
451 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, | |
452 | W4, W5, W6, W7, W0, W1, W2, W3); | |
453 | ||
454 | _R( _e, _a, _b, _c, _d, F2, 36, | |
455 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, | |
456 | W3, W4, W5, W6, W7, W0, W1, W2); | |
457 | _R( _d, _e, _a, _b, _c, F2, 37, | |
458 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, | |
459 | W3, W4, W5, W6, W7, W0, W1, W2); | |
460 | _R( _c, _d, _e, _a, _b, F2, 38, | |
461 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, | |
462 | W3, W4, W5, W6, W7, W0, W1, W2); | |
463 | _R( _b, _c, _d, _e, _a, F2, 39, | |
464 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, | |
465 | W3, W4, W5, W6, W7, W0, W1, W2); | |
466 | ||
467 | _R( _a, _b, _c, _d, _e, F3, 40, | |
468 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, | |
469 | W2, W3, W4, W5, W6, W7, W0, W1); | |
470 | _R( _e, _a, _b, _c, _d, F3, 41, | |
471 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, | |
472 | W2, W3, W4, W5, W6, W7, W0, W1); | |
473 | _R( _d, _e, _a, _b, _c, F3, 42, | |
474 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, | |
475 | W2, W3, W4, W5, W6, W7, W0, W1); | |
476 | _R( _c, _d, _e, _a, _b, F3, 43, | |
477 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, | |
478 | W2, W3, W4, W5, W6, W7, W0, W1); | |
479 | ||
480 | #undef curK | |
481 | #define curK qK4 | |
482 | _R( _b, _c, _d, _e, _a, F3, 44, | |
483 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, | |
484 | W1, W2, W3, W4, W5, W6, W7, W0); | |
485 | _R( _a, _b, _c, _d, _e, F3, 45, | |
486 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, | |
487 | W1, W2, W3, W4, W5, W6, W7, W0); | |
488 | _R( _e, _a, _b, _c, _d, F3, 46, | |
489 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, | |
490 | W1, W2, W3, W4, W5, W6, W7, W0); | |
491 | _R( _d, _e, _a, _b, _c, F3, 47, | |
492 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, | |
493 | W1, W2, W3, W4, W5, W6, W7, W0); | |
494 | ||
495 | _R( _c, _d, _e, _a, _b, F3, 48, | |
496 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, | |
497 | W0, W1, W2, W3, W4, W5, W6, W7); | |
498 | _R( _b, _c, _d, _e, _a, F3, 49, | |
499 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, | |
500 | W0, W1, W2, W3, W4, W5, W6, W7); | |
501 | _R( _a, _b, _c, _d, _e, F3, 50, | |
502 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, | |
503 | W0, W1, W2, W3, W4, W5, W6, W7); | |
504 | _R( _e, _a, _b, _c, _d, F3, 51, | |
505 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, | |
506 | W0, W1, W2, W3, W4, W5, W6, W7); | |
507 | ||
508 | _R( _d, _e, _a, _b, _c, F3, 52, | |
509 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, | |
510 | W7, W0, W1, W2, W3, W4, W5, W6); | |
511 | _R( _c, _d, _e, _a, _b, F3, 53, | |
512 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, | |
513 | W7, W0, W1, W2, W3, W4, W5, W6); | |
514 | _R( _b, _c, _d, _e, _a, F3, 54, | |
515 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, | |
516 | W7, W0, W1, W2, W3, W4, W5, W6); | |
517 | _R( _a, _b, _c, _d, _e, F3, 55, | |
518 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, | |
519 | W7, W0, W1, W2, W3, W4, W5, W6); | |
520 | ||
521 | _R( _e, _a, _b, _c, _d, F3, 56, | |
522 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, | |
523 | W6, W7, W0, W1, W2, W3, W4, W5); | |
524 | _R( _d, _e, _a, _b, _c, F3, 57, | |
525 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, | |
526 | W6, W7, W0, W1, W2, W3, W4, W5); | |
527 | _R( _c, _d, _e, _a, _b, F3, 58, | |
528 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, | |
529 | W6, W7, W0, W1, W2, W3, W4, W5); | |
530 | _R( _b, _c, _d, _e, _a, F3, 59, | |
531 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, | |
532 | W6, W7, W0, W1, W2, W3, W4, W5); | |
533 | ||
534 | subs RNBLKS, #1; | |
535 | ||
536 | _R( _a, _b, _c, _d, _e, F4, 60, | |
537 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, | |
538 | W5, W6, W7, W0, W1, W2, W3, W4); | |
539 | _R( _e, _a, _b, _c, _d, F4, 61, | |
540 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, | |
541 | W5, W6, W7, W0, W1, W2, W3, W4); | |
542 | _R( _d, _e, _a, _b, _c, F4, 62, | |
543 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, | |
544 | W5, W6, W7, W0, W1, W2, W3, W4); | |
545 | _R( _c, _d, _e, _a, _b, F4, 63, | |
546 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, | |
547 | W5, W6, W7, W0, W1, W2, W3, W4); | |
548 | ||
549 | beq .Lend; | |
550 | ||
551 | /* Transform 64-79 + Precalc 0-15 of next block. */ | |
552 | #undef curK | |
553 | #define curK qK1 | |
554 | _R( _b, _c, _d, _e, _a, F4, 64, | |
555 | WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
556 | _R( _a, _b, _c, _d, _e, F4, 65, | |
557 | WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
558 | _R( _e, _a, _b, _c, _d, F4, 66, | |
559 | WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
560 | _R( _d, _e, _a, _b, _c, F4, 67, | |
561 | WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
562 | ||
563 | _R( _c, _d, _e, _a, _b, F4, 68, | |
564 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
565 | _R( _b, _c, _d, _e, _a, F4, 69, | |
566 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
567 | _R( _a, _b, _c, _d, _e, F4, 70, | |
568 | WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
569 | _R( _e, _a, _b, _c, _d, F4, 71, | |
570 | WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
571 | ||
572 | _R( _d, _e, _a, _b, _c, F4, 72, | |
573 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
574 | _R( _c, _d, _e, _a, _b, F4, 73, | |
575 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
576 | _R( _b, _c, _d, _e, _a, F4, 74, | |
577 | WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
578 | _R( _a, _b, _c, _d, _e, F4, 75, | |
579 | WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
580 | ||
581 | _R( _e, _a, _b, _c, _d, F4, 76, | |
582 | WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
583 | _R( _d, _e, _a, _b, _c, F4, 77, | |
584 | WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
585 | _R( _c, _d, _e, _a, _b, F4, 78, | |
586 | WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
587 | _R( _b, _c, _d, _e, _a, F4, 79, | |
588 | WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ ); | |
589 | ||
590 | /* Update the chaining variables. */ | |
591 | ldm RSTATE, {RT0-RT3}; | |
592 | add _a, RT0; | |
593 | ldr RT0, [RSTATE, #state_h4]; | |
594 | add _b, RT1; | |
595 | add _c, RT2; | |
596 | add _d, RT3; | |
597 | add _e, RT0; | |
598 | stm RSTATE, {_a-_e}; | |
599 | ||
600 | b .Loop; | |
601 | ||
602 | .Lend: | |
603 | /* Transform 64-79 */ | |
604 | R( _b, _c, _d, _e, _a, F4, 64 ); | |
605 | R( _a, _b, _c, _d, _e, F4, 65 ); | |
606 | R( _e, _a, _b, _c, _d, F4, 66 ); | |
607 | R( _d, _e, _a, _b, _c, F4, 67 ); | |
608 | R( _c, _d, _e, _a, _b, F4, 68 ); | |
609 | R( _b, _c, _d, _e, _a, F4, 69 ); | |
610 | R( _a, _b, _c, _d, _e, F4, 70 ); | |
611 | R( _e, _a, _b, _c, _d, F4, 71 ); | |
612 | R( _d, _e, _a, _b, _c, F4, 72 ); | |
613 | R( _c, _d, _e, _a, _b, F4, 73 ); | |
614 | R( _b, _c, _d, _e, _a, F4, 74 ); | |
615 | R( _a, _b, _c, _d, _e, F4, 75 ); | |
616 | R( _e, _a, _b, _c, _d, F4, 76 ); | |
617 | R( _d, _e, _a, _b, _c, F4, 77 ); | |
618 | R( _c, _d, _e, _a, _b, F4, 78 ); | |
619 | R( _b, _c, _d, _e, _a, F4, 79 ); | |
620 | ||
621 | mov sp, ROLDSTACK; | |
622 | ||
623 | /* Update the chaining variables. */ | |
624 | ldm RSTATE, {RT0-RT3}; | |
625 | add _a, RT0; | |
626 | ldr RT0, [RSTATE, #state_h4]; | |
627 | add _b, RT1; | |
628 | add _c, RT2; | |
629 | add _d, RT3; | |
630 | /*vpop {q4-q7};*/ | |
631 | add _e, RT0; | |
632 | stm RSTATE, {_a-_e}; | |
633 | ||
634 | pop {r4-r12, pc}; | |
635 | ||
636 | .Ldo_nothing: | |
637 | bx lr | |
638 | ENDPROC(sha1_transform_neon) |