Commit | Line | Data |
---|---|---|
5c380d62 LB |
1 | #!/usr/bin/env perl |
2 | # | |
3 | # ==================================================================== | |
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | |
5 | # project. The module is, however, dual licensed under OpenSSL and | |
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | |
7 | # details see http://www.openssl.org/~appro/cryptogams/. | |
8 | # ==================================================================== | |
9 | # | |
10 | # This module implements support for AES instructions as per PowerISA | |
11 | # specification version 2.07, first implemented by POWER8 processor. | |
12 | # The module is endian-agnostic in sense that it supports both big- | |
13 | # and little-endian cases. Data alignment in parallelizable modes is | |
14 | # handled with VSX loads and stores, which implies MSR.VSX flag being | |
15 | # set. It should also be noted that ISA specification doesn't prohibit | |
16 | # alignment exceptions for these instructions on page boundaries. | |
17 | # Initially alignment was handled in pure AltiVec/VMX way [when data | |
18 | # is aligned programmatically, which in turn guarantees exception- | |
19 | # free execution], but it turned to hamper performance when vcipher | |
20 | # instructions are interleaved. It's reckoned that eventual | |
21 | # misalignment penalties at page boundaries are in average lower | |
22 | # than additional overhead in pure AltiVec approach. | |
23 | ||
24 | $flavour = shift; | |
25 | ||
26 | if ($flavour =~ /64/) { | |
27 | $SIZE_T =8; | |
28 | $LRSAVE =2*$SIZE_T; | |
29 | $STU ="stdu"; | |
30 | $POP ="ld"; | |
31 | $PUSH ="std"; | |
32 | $UCMP ="cmpld"; | |
33 | $SHL ="sldi"; | |
34 | } elsif ($flavour =~ /32/) { | |
35 | $SIZE_T =4; | |
36 | $LRSAVE =$SIZE_T; | |
37 | $STU ="stwu"; | |
38 | $POP ="lwz"; | |
39 | $PUSH ="stw"; | |
40 | $UCMP ="cmplw"; | |
41 | $SHL ="slwi"; | |
42 | } else { die "nonsense $flavour"; } | |
43 | ||
44 | $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; | |
45 | ||
46 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
47 | ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or | |
48 | ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or | |
49 | die "can't locate ppc-xlate.pl"; | |
50 | ||
51 | open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; | |
52 | ||
53 | $FRAME=8*$SIZE_T; | |
54 | $prefix="aes_p8"; | |
55 | ||
56 | $sp="r1"; | |
57 | $vrsave="r12"; | |
58 | ||
59 | ######################################################################### | |
60 | {{{ # Key setup procedures # | |
61 | my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); | |
62 | my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); | |
63 | my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); | |
64 | ||
65 | $code.=<<___; | |
66 | .machine "any" | |
67 | ||
68 | .text | |
69 | ||
70 | .align 7 | |
71 | rcon: | |
72 | .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev | |
73 | .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev | |
74 | .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev | |
75 | .long 0,0,0,0 ?asis | |
76 | Lconsts: | |
77 | mflr r0 | |
78 | bcl 20,31,\$+4 | |
79 | mflr $ptr #vvvvv "distance between . and rcon | |
80 | addi $ptr,$ptr,-0x48 | |
81 | mtlr r0 | |
82 | blr | |
83 | .long 0 | |
84 | .byte 0,12,0x14,0,0,0,0,0 | |
85 | .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" | |
86 | ||
87 | .globl .${prefix}_set_encrypt_key | |
5c380d62 LB |
88 | Lset_encrypt_key: |
89 | mflr r11 | |
90 | $PUSH r11,$LRSAVE($sp) | |
91 | ||
92 | li $ptr,-1 | |
93 | ${UCMP}i $inp,0 | |
94 | beq- Lenc_key_abort # if ($inp==0) return -1; | |
95 | ${UCMP}i $out,0 | |
96 | beq- Lenc_key_abort # if ($out==0) return -1; | |
97 | li $ptr,-2 | |
98 | cmpwi $bits,128 | |
99 | blt- Lenc_key_abort | |
100 | cmpwi $bits,256 | |
101 | bgt- Lenc_key_abort | |
102 | andi. r0,$bits,0x3f | |
103 | bne- Lenc_key_abort | |
104 | ||
105 | lis r0,0xfff0 | |
106 | mfspr $vrsave,256 | |
107 | mtspr 256,r0 | |
108 | ||
109 | bl Lconsts | |
110 | mtlr r11 | |
111 | ||
112 | neg r9,$inp | |
113 | lvx $in0,0,$inp | |
114 | addi $inp,$inp,15 # 15 is not typo | |
115 | lvsr $key,0,r9 # borrow $key | |
116 | li r8,0x20 | |
117 | cmpwi $bits,192 | |
118 | lvx $in1,0,$inp | |
119 | le?vspltisb $mask,0x0f # borrow $mask | |
120 | lvx $rcon,0,$ptr | |
121 | le?vxor $key,$key,$mask # adjust for byte swap | |
122 | lvx $mask,r8,$ptr | |
123 | addi $ptr,$ptr,0x10 | |
124 | vperm $in0,$in0,$in1,$key # align [and byte swap in LE] | |
125 | li $cnt,8 | |
126 | vxor $zero,$zero,$zero | |
127 | mtctr $cnt | |
128 | ||
129 | ?lvsr $outperm,0,$out | |
130 | vspltisb $outmask,-1 | |
131 | lvx $outhead,0,$out | |
132 | ?vperm $outmask,$zero,$outmask,$outperm | |
133 | ||
134 | blt Loop128 | |
135 | addi $inp,$inp,8 | |
136 | beq L192 | |
137 | addi $inp,$inp,8 | |
138 | b L256 | |
139 | ||
140 | .align 4 | |
141 | Loop128: | |
142 | vperm $key,$in0,$in0,$mask # rotate-n-splat | |
143 | vsldoi $tmp,$zero,$in0,12 # >>32 | |
144 | vperm $outtail,$in0,$in0,$outperm # rotate | |
145 | vsel $stage,$outhead,$outtail,$outmask | |
146 | vmr $outhead,$outtail | |
147 | vcipherlast $key,$key,$rcon | |
148 | stvx $stage,0,$out | |
149 | addi $out,$out,16 | |
150 | ||
151 | vxor $in0,$in0,$tmp | |
152 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
153 | vxor $in0,$in0,$tmp | |
154 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
155 | vxor $in0,$in0,$tmp | |
156 | vadduwm $rcon,$rcon,$rcon | |
157 | vxor $in0,$in0,$key | |
158 | bdnz Loop128 | |
159 | ||
160 | lvx $rcon,0,$ptr # last two round keys | |
161 | ||
162 | vperm $key,$in0,$in0,$mask # rotate-n-splat | |
163 | vsldoi $tmp,$zero,$in0,12 # >>32 | |
164 | vperm $outtail,$in0,$in0,$outperm # rotate | |
165 | vsel $stage,$outhead,$outtail,$outmask | |
166 | vmr $outhead,$outtail | |
167 | vcipherlast $key,$key,$rcon | |
168 | stvx $stage,0,$out | |
169 | addi $out,$out,16 | |
170 | ||
171 | vxor $in0,$in0,$tmp | |
172 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
173 | vxor $in0,$in0,$tmp | |
174 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
175 | vxor $in0,$in0,$tmp | |
176 | vadduwm $rcon,$rcon,$rcon | |
177 | vxor $in0,$in0,$key | |
178 | ||
179 | vperm $key,$in0,$in0,$mask # rotate-n-splat | |
180 | vsldoi $tmp,$zero,$in0,12 # >>32 | |
181 | vperm $outtail,$in0,$in0,$outperm # rotate | |
182 | vsel $stage,$outhead,$outtail,$outmask | |
183 | vmr $outhead,$outtail | |
184 | vcipherlast $key,$key,$rcon | |
185 | stvx $stage,0,$out | |
186 | addi $out,$out,16 | |
187 | ||
188 | vxor $in0,$in0,$tmp | |
189 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
190 | vxor $in0,$in0,$tmp | |
191 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
192 | vxor $in0,$in0,$tmp | |
193 | vxor $in0,$in0,$key | |
194 | vperm $outtail,$in0,$in0,$outperm # rotate | |
195 | vsel $stage,$outhead,$outtail,$outmask | |
196 | vmr $outhead,$outtail | |
197 | stvx $stage,0,$out | |
198 | ||
199 | addi $inp,$out,15 # 15 is not typo | |
200 | addi $out,$out,0x50 | |
201 | ||
202 | li $rounds,10 | |
203 | b Ldone | |
204 | ||
205 | .align 4 | |
206 | L192: | |
207 | lvx $tmp,0,$inp | |
208 | li $cnt,4 | |
209 | vperm $outtail,$in0,$in0,$outperm # rotate | |
210 | vsel $stage,$outhead,$outtail,$outmask | |
211 | vmr $outhead,$outtail | |
212 | stvx $stage,0,$out | |
213 | addi $out,$out,16 | |
214 | vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] | |
215 | vspltisb $key,8 # borrow $key | |
216 | mtctr $cnt | |
217 | vsububm $mask,$mask,$key # adjust the mask | |
218 | ||
219 | Loop192: | |
220 | vperm $key,$in1,$in1,$mask # roate-n-splat | |
221 | vsldoi $tmp,$zero,$in0,12 # >>32 | |
222 | vcipherlast $key,$key,$rcon | |
223 | ||
224 | vxor $in0,$in0,$tmp | |
225 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
226 | vxor $in0,$in0,$tmp | |
227 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
228 | vxor $in0,$in0,$tmp | |
229 | ||
230 | vsldoi $stage,$zero,$in1,8 | |
231 | vspltw $tmp,$in0,3 | |
232 | vxor $tmp,$tmp,$in1 | |
233 | vsldoi $in1,$zero,$in1,12 # >>32 | |
234 | vadduwm $rcon,$rcon,$rcon | |
235 | vxor $in1,$in1,$tmp | |
236 | vxor $in0,$in0,$key | |
237 | vxor $in1,$in1,$key | |
238 | vsldoi $stage,$stage,$in0,8 | |
239 | ||
240 | vperm $key,$in1,$in1,$mask # rotate-n-splat | |
241 | vsldoi $tmp,$zero,$in0,12 # >>32 | |
242 | vperm $outtail,$stage,$stage,$outperm # rotate | |
243 | vsel $stage,$outhead,$outtail,$outmask | |
244 | vmr $outhead,$outtail | |
245 | vcipherlast $key,$key,$rcon | |
246 | stvx $stage,0,$out | |
247 | addi $out,$out,16 | |
248 | ||
249 | vsldoi $stage,$in0,$in1,8 | |
250 | vxor $in0,$in0,$tmp | |
251 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
252 | vperm $outtail,$stage,$stage,$outperm # rotate | |
253 | vsel $stage,$outhead,$outtail,$outmask | |
254 | vmr $outhead,$outtail | |
255 | vxor $in0,$in0,$tmp | |
256 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
257 | vxor $in0,$in0,$tmp | |
258 | stvx $stage,0,$out | |
259 | addi $out,$out,16 | |
260 | ||
261 | vspltw $tmp,$in0,3 | |
262 | vxor $tmp,$tmp,$in1 | |
263 | vsldoi $in1,$zero,$in1,12 # >>32 | |
264 | vadduwm $rcon,$rcon,$rcon | |
265 | vxor $in1,$in1,$tmp | |
266 | vxor $in0,$in0,$key | |
267 | vxor $in1,$in1,$key | |
268 | vperm $outtail,$in0,$in0,$outperm # rotate | |
269 | vsel $stage,$outhead,$outtail,$outmask | |
270 | vmr $outhead,$outtail | |
271 | stvx $stage,0,$out | |
272 | addi $inp,$out,15 # 15 is not typo | |
273 | addi $out,$out,16 | |
274 | bdnz Loop192 | |
275 | ||
276 | li $rounds,12 | |
277 | addi $out,$out,0x20 | |
278 | b Ldone | |
279 | ||
280 | .align 4 | |
281 | L256: | |
282 | lvx $tmp,0,$inp | |
283 | li $cnt,7 | |
284 | li $rounds,14 | |
285 | vperm $outtail,$in0,$in0,$outperm # rotate | |
286 | vsel $stage,$outhead,$outtail,$outmask | |
287 | vmr $outhead,$outtail | |
288 | stvx $stage,0,$out | |
289 | addi $out,$out,16 | |
290 | vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] | |
291 | mtctr $cnt | |
292 | ||
293 | Loop256: | |
294 | vperm $key,$in1,$in1,$mask # rotate-n-splat | |
295 | vsldoi $tmp,$zero,$in0,12 # >>32 | |
296 | vperm $outtail,$in1,$in1,$outperm # rotate | |
297 | vsel $stage,$outhead,$outtail,$outmask | |
298 | vmr $outhead,$outtail | |
299 | vcipherlast $key,$key,$rcon | |
300 | stvx $stage,0,$out | |
301 | addi $out,$out,16 | |
302 | ||
303 | vxor $in0,$in0,$tmp | |
304 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
305 | vxor $in0,$in0,$tmp | |
306 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
307 | vxor $in0,$in0,$tmp | |
308 | vadduwm $rcon,$rcon,$rcon | |
309 | vxor $in0,$in0,$key | |
310 | vperm $outtail,$in0,$in0,$outperm # rotate | |
311 | vsel $stage,$outhead,$outtail,$outmask | |
312 | vmr $outhead,$outtail | |
313 | stvx $stage,0,$out | |
314 | addi $inp,$out,15 # 15 is not typo | |
315 | addi $out,$out,16 | |
316 | bdz Ldone | |
317 | ||
318 | vspltw $key,$in0,3 # just splat | |
319 | vsldoi $tmp,$zero,$in1,12 # >>32 | |
320 | vsbox $key,$key | |
321 | ||
322 | vxor $in1,$in1,$tmp | |
323 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
324 | vxor $in1,$in1,$tmp | |
325 | vsldoi $tmp,$zero,$tmp,12 # >>32 | |
326 | vxor $in1,$in1,$tmp | |
327 | ||
328 | vxor $in1,$in1,$key | |
329 | b Loop256 | |
330 | ||
331 | .align 4 | |
332 | Ldone: | |
333 | lvx $in1,0,$inp # redundant in aligned case | |
334 | vsel $in1,$outhead,$in1,$outmask | |
335 | stvx $in1,0,$inp | |
336 | li $ptr,0 | |
337 | mtspr 256,$vrsave | |
338 | stw $rounds,0($out) | |
339 | ||
340 | Lenc_key_abort: | |
341 | mr r3,$ptr | |
342 | blr | |
343 | .long 0 | |
344 | .byte 0,12,0x14,1,0,0,3,0 | |
345 | .long 0 | |
346 | .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key | |
347 | ||
348 | .globl .${prefix}_set_decrypt_key | |
5c380d62 LB |
349 | $STU $sp,-$FRAME($sp) |
350 | mflr r10 | |
351 | $PUSH r10,$FRAME+$LRSAVE($sp) | |
352 | bl Lset_encrypt_key | |
353 | mtlr r10 | |
354 | ||
355 | cmpwi r3,0 | |
356 | bne- Ldec_key_abort | |
357 | ||
358 | slwi $cnt,$rounds,4 | |
359 | subi $inp,$out,240 # first round key | |
360 | srwi $rounds,$rounds,1 | |
361 | add $out,$inp,$cnt # last round key | |
362 | mtctr $rounds | |
363 | ||
364 | Ldeckey: | |
365 | lwz r0, 0($inp) | |
366 | lwz r6, 4($inp) | |
367 | lwz r7, 8($inp) | |
368 | lwz r8, 12($inp) | |
369 | addi $inp,$inp,16 | |
370 | lwz r9, 0($out) | |
371 | lwz r10,4($out) | |
372 | lwz r11,8($out) | |
373 | lwz r12,12($out) | |
374 | stw r0, 0($out) | |
375 | stw r6, 4($out) | |
376 | stw r7, 8($out) | |
377 | stw r8, 12($out) | |
378 | subi $out,$out,16 | |
379 | stw r9, -16($inp) | |
380 | stw r10,-12($inp) | |
381 | stw r11,-8($inp) | |
382 | stw r12,-4($inp) | |
383 | bdnz Ldeckey | |
384 | ||
385 | xor r3,r3,r3 # return value | |
386 | Ldec_key_abort: | |
387 | addi $sp,$sp,$FRAME | |
388 | blr | |
389 | .long 0 | |
390 | .byte 0,12,4,1,0x80,0,3,0 | |
391 | .long 0 | |
392 | .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key | |
393 | ___ | |
394 | }}} | |
395 | ######################################################################### | |
396 | {{{ # Single block en- and decrypt procedures # | |
397 | sub gen_block () { | |
398 | my $dir = shift; | |
399 | my $n = $dir eq "de" ? "n" : ""; | |
400 | my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); | |
401 | ||
402 | $code.=<<___; | |
403 | .globl .${prefix}_${dir}crypt | |
5c380d62 LB |
404 | lwz $rounds,240($key) |
405 | lis r0,0xfc00 | |
406 | mfspr $vrsave,256 | |
407 | li $idx,15 # 15 is not typo | |
408 | mtspr 256,r0 | |
409 | ||
410 | lvx v0,0,$inp | |
411 | neg r11,$out | |
412 | lvx v1,$idx,$inp | |
413 | lvsl v2,0,$inp # inpperm | |
414 | le?vspltisb v4,0x0f | |
415 | ?lvsl v3,0,r11 # outperm | |
416 | le?vxor v2,v2,v4 | |
417 | li $idx,16 | |
418 | vperm v0,v0,v1,v2 # align [and byte swap in LE] | |
419 | lvx v1,0,$key | |
420 | ?lvsl v5,0,$key # keyperm | |
421 | srwi $rounds,$rounds,1 | |
422 | lvx v2,$idx,$key | |
423 | addi $idx,$idx,16 | |
424 | subi $rounds,$rounds,1 | |
425 | ?vperm v1,v1,v2,v5 # align round key | |
426 | ||
427 | vxor v0,v0,v1 | |
428 | lvx v1,$idx,$key | |
429 | addi $idx,$idx,16 | |
430 | mtctr $rounds | |
431 | ||
432 | Loop_${dir}c: | |
433 | ?vperm v2,v2,v1,v5 | |
434 | v${n}cipher v0,v0,v2 | |
435 | lvx v2,$idx,$key | |
436 | addi $idx,$idx,16 | |
437 | ?vperm v1,v1,v2,v5 | |
438 | v${n}cipher v0,v0,v1 | |
439 | lvx v1,$idx,$key | |
440 | addi $idx,$idx,16 | |
441 | bdnz Loop_${dir}c | |
442 | ||
443 | ?vperm v2,v2,v1,v5 | |
444 | v${n}cipher v0,v0,v2 | |
445 | lvx v2,$idx,$key | |
446 | ?vperm v1,v1,v2,v5 | |
447 | v${n}cipherlast v0,v0,v1 | |
448 | ||
449 | vspltisb v2,-1 | |
450 | vxor v1,v1,v1 | |
451 | li $idx,15 # 15 is not typo | |
452 | ?vperm v2,v1,v2,v3 # outmask | |
453 | le?vxor v3,v3,v4 | |
454 | lvx v1,0,$out # outhead | |
455 | vperm v0,v0,v0,v3 # rotate [and byte swap in LE] | |
456 | vsel v1,v1,v0,v2 | |
457 | lvx v4,$idx,$out | |
458 | stvx v1,0,$out | |
459 | vsel v0,v0,v4,v2 | |
460 | stvx v0,$idx,$out | |
461 | ||
462 | mtspr 256,$vrsave | |
463 | blr | |
464 | .long 0 | |
465 | .byte 0,12,0x14,0,0,0,3,0 | |
466 | .long 0 | |
467 | .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt | |
468 | ___ | |
469 | } | |
470 | &gen_block("en"); | |
471 | &gen_block("de"); | |
472 | }}} | |
473 | ######################################################################### | |
474 | {{{ # CBC en- and decrypt procedures # | |
475 | my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); | |
476 | my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); | |
477 | my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= | |
478 | map("v$_",(4..10)); | |
479 | $code.=<<___; | |
480 | .globl .${prefix}_cbc_encrypt | |
5c380d62 LB |
481 | ${UCMP}i $len,16 |
482 | bltlr- | |
483 | ||
484 | cmpwi $enc,0 # test direction | |
485 | lis r0,0xffe0 | |
486 | mfspr $vrsave,256 | |
487 | mtspr 256,r0 | |
488 | ||
489 | li $idx,15 | |
490 | vxor $rndkey0,$rndkey0,$rndkey0 | |
491 | le?vspltisb $tmp,0x0f | |
492 | ||
493 | lvx $ivec,0,$ivp # load [unaligned] iv | |
494 | lvsl $inpperm,0,$ivp | |
495 | lvx $inptail,$idx,$ivp | |
496 | le?vxor $inpperm,$inpperm,$tmp | |
497 | vperm $ivec,$ivec,$inptail,$inpperm | |
498 | ||
499 | neg r11,$inp | |
500 | ?lvsl $keyperm,0,$key # prepare for unaligned key | |
501 | lwz $rounds,240($key) | |
502 | ||
503 | lvsr $inpperm,0,r11 # prepare for unaligned load | |
504 | lvx $inptail,0,$inp | |
505 | addi $inp,$inp,15 # 15 is not typo | |
506 | le?vxor $inpperm,$inpperm,$tmp | |
507 | ||
508 | ?lvsr $outperm,0,$out # prepare for unaligned store | |
509 | vspltisb $outmask,-1 | |
510 | lvx $outhead,0,$out | |
511 | ?vperm $outmask,$rndkey0,$outmask,$outperm | |
512 | le?vxor $outperm,$outperm,$tmp | |
513 | ||
514 | srwi $rounds,$rounds,1 | |
515 | li $idx,16 | |
516 | subi $rounds,$rounds,1 | |
517 | beq Lcbc_dec | |
518 | ||
519 | Lcbc_enc: | |
520 | vmr $inout,$inptail | |
521 | lvx $inptail,0,$inp | |
522 | addi $inp,$inp,16 | |
523 | mtctr $rounds | |
524 | subi $len,$len,16 # len-=16 | |
525 | ||
526 | lvx $rndkey0,0,$key | |
527 | vperm $inout,$inout,$inptail,$inpperm | |
528 | lvx $rndkey1,$idx,$key | |
529 | addi $idx,$idx,16 | |
530 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | |
531 | vxor $inout,$inout,$rndkey0 | |
532 | lvx $rndkey0,$idx,$key | |
533 | addi $idx,$idx,16 | |
534 | vxor $inout,$inout,$ivec | |
535 | ||
536 | Loop_cbc_enc: | |
537 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | |
538 | vcipher $inout,$inout,$rndkey1 | |
539 | lvx $rndkey1,$idx,$key | |
540 | addi $idx,$idx,16 | |
541 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | |
542 | vcipher $inout,$inout,$rndkey0 | |
543 | lvx $rndkey0,$idx,$key | |
544 | addi $idx,$idx,16 | |
545 | bdnz Loop_cbc_enc | |
546 | ||
547 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | |
548 | vcipher $inout,$inout,$rndkey1 | |
549 | lvx $rndkey1,$idx,$key | |
550 | li $idx,16 | |
551 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | |
552 | vcipherlast $ivec,$inout,$rndkey0 | |
553 | ${UCMP}i $len,16 | |
554 | ||
555 | vperm $tmp,$ivec,$ivec,$outperm | |
556 | vsel $inout,$outhead,$tmp,$outmask | |
557 | vmr $outhead,$tmp | |
558 | stvx $inout,0,$out | |
559 | addi $out,$out,16 | |
560 | bge Lcbc_enc | |
561 | ||
562 | b Lcbc_done | |
563 | ||
564 | .align 4 | |
565 | Lcbc_dec: | |
566 | ${UCMP}i $len,128 | |
567 | bge _aesp8_cbc_decrypt8x | |
568 | vmr $tmp,$inptail | |
569 | lvx $inptail,0,$inp | |
570 | addi $inp,$inp,16 | |
571 | mtctr $rounds | |
572 | subi $len,$len,16 # len-=16 | |
573 | ||
574 | lvx $rndkey0,0,$key | |
575 | vperm $tmp,$tmp,$inptail,$inpperm | |
576 | lvx $rndkey1,$idx,$key | |
577 | addi $idx,$idx,16 | |
578 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | |
579 | vxor $inout,$tmp,$rndkey0 | |
580 | lvx $rndkey0,$idx,$key | |
581 | addi $idx,$idx,16 | |
582 | ||
583 | Loop_cbc_dec: | |
584 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | |
585 | vncipher $inout,$inout,$rndkey1 | |
586 | lvx $rndkey1,$idx,$key | |
587 | addi $idx,$idx,16 | |
588 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | |
589 | vncipher $inout,$inout,$rndkey0 | |
590 | lvx $rndkey0,$idx,$key | |
591 | addi $idx,$idx,16 | |
592 | bdnz Loop_cbc_dec | |
593 | ||
594 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | |
595 | vncipher $inout,$inout,$rndkey1 | |
596 | lvx $rndkey1,$idx,$key | |
597 | li $idx,16 | |
598 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | |
599 | vncipherlast $inout,$inout,$rndkey0 | |
600 | ${UCMP}i $len,16 | |
601 | ||
602 | vxor $inout,$inout,$ivec | |
603 | vmr $ivec,$tmp | |
604 | vperm $tmp,$inout,$inout,$outperm | |
605 | vsel $inout,$outhead,$tmp,$outmask | |
606 | vmr $outhead,$tmp | |
607 | stvx $inout,0,$out | |
608 | addi $out,$out,16 | |
609 | bge Lcbc_dec | |
610 | ||
611 | Lcbc_done: | |
612 | addi $out,$out,-1 | |
613 | lvx $inout,0,$out # redundant in aligned case | |
614 | vsel $inout,$outhead,$inout,$outmask | |
615 | stvx $inout,0,$out | |
616 | ||
617 | neg $enc,$ivp # write [unaligned] iv | |
618 | li $idx,15 # 15 is not typo | |
619 | vxor $rndkey0,$rndkey0,$rndkey0 | |
620 | vspltisb $outmask,-1 | |
621 | le?vspltisb $tmp,0x0f | |
622 | ?lvsl $outperm,0,$enc | |
623 | ?vperm $outmask,$rndkey0,$outmask,$outperm | |
624 | le?vxor $outperm,$outperm,$tmp | |
625 | lvx $outhead,0,$ivp | |
626 | vperm $ivec,$ivec,$ivec,$outperm | |
627 | vsel $inout,$outhead,$ivec,$outmask | |
628 | lvx $inptail,$idx,$ivp | |
629 | stvx $inout,0,$ivp | |
630 | vsel $inout,$ivec,$inptail,$outmask | |
631 | stvx $inout,$idx,$ivp | |
632 | ||
633 | mtspr 256,$vrsave | |
634 | blr | |
635 | .long 0 | |
636 | .byte 0,12,0x14,0,0,0,6,0 | |
637 | .long 0 | |
638 | ___ | |
639 | ######################################################################### | |
640 | {{ # Optimized CBC decrypt procedure # | |
641 | my $key_="r11"; | |
642 | my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); | |
643 | my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); | |
644 | my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); | |
645 | my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys | |
646 | # v26-v31 last 6 round keys | |
647 | my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment | |
648 | ||
649 | $code.=<<___; | |
650 | .align 5 | |
651 | _aesp8_cbc_decrypt8x: | |
652 | $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) | |
653 | li r10,`$FRAME+8*16+15` | |
654 | li r11,`$FRAME+8*16+31` | |
655 | stvx v20,r10,$sp # ABI says so | |
656 | addi r10,r10,32 | |
657 | stvx v21,r11,$sp | |
658 | addi r11,r11,32 | |
659 | stvx v22,r10,$sp | |
660 | addi r10,r10,32 | |
661 | stvx v23,r11,$sp | |
662 | addi r11,r11,32 | |
663 | stvx v24,r10,$sp | |
664 | addi r10,r10,32 | |
665 | stvx v25,r11,$sp | |
666 | addi r11,r11,32 | |
667 | stvx v26,r10,$sp | |
668 | addi r10,r10,32 | |
669 | stvx v27,r11,$sp | |
670 | addi r11,r11,32 | |
671 | stvx v28,r10,$sp | |
672 | addi r10,r10,32 | |
673 | stvx v29,r11,$sp | |
674 | addi r11,r11,32 | |
675 | stvx v30,r10,$sp | |
676 | stvx v31,r11,$sp | |
677 | li r0,-1 | |
678 | stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave | |
679 | li $x10,0x10 | |
680 | $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) | |
681 | li $x20,0x20 | |
682 | $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) | |
683 | li $x30,0x30 | |
684 | $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) | |
685 | li $x40,0x40 | |
686 | $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) | |
687 | li $x50,0x50 | |
688 | $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) | |
689 | li $x60,0x60 | |
690 | $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) | |
691 | li $x70,0x70 | |
692 | mtspr 256,r0 | |
693 | ||
694 | subi $rounds,$rounds,3 # -4 in total | |
695 | subi $len,$len,128 # bias | |
696 | ||
697 | lvx $rndkey0,$x00,$key # load key schedule | |
698 | lvx v30,$x10,$key | |
699 | addi $key,$key,0x20 | |
700 | lvx v31,$x00,$key | |
701 | ?vperm $rndkey0,$rndkey0,v30,$keyperm | |
702 | addi $key_,$sp,$FRAME+15 | |
703 | mtctr $rounds | |
704 | ||
705 | Load_cbc_dec_key: | |
706 | ?vperm v24,v30,v31,$keyperm | |
707 | lvx v30,$x10,$key | |
708 | addi $key,$key,0x20 | |
709 | stvx v24,$x00,$key_ # off-load round[1] | |
710 | ?vperm v25,v31,v30,$keyperm | |
711 | lvx v31,$x00,$key | |
712 | stvx v25,$x10,$key_ # off-load round[2] | |
713 | addi $key_,$key_,0x20 | |
714 | bdnz Load_cbc_dec_key | |
715 | ||
716 | lvx v26,$x10,$key | |
717 | ?vperm v24,v30,v31,$keyperm | |
718 | lvx v27,$x20,$key | |
719 | stvx v24,$x00,$key_ # off-load round[3] | |
720 | ?vperm v25,v31,v26,$keyperm | |
721 | lvx v28,$x30,$key | |
722 | stvx v25,$x10,$key_ # off-load round[4] | |
723 | addi $key_,$sp,$FRAME+15 # rewind $key_ | |
724 | ?vperm v26,v26,v27,$keyperm | |
725 | lvx v29,$x40,$key | |
726 | ?vperm v27,v27,v28,$keyperm | |
727 | lvx v30,$x50,$key | |
728 | ?vperm v28,v28,v29,$keyperm | |
729 | lvx v31,$x60,$key | |
730 | ?vperm v29,v29,v30,$keyperm | |
731 | lvx $out0,$x70,$key # borrow $out0 | |
732 | ?vperm v30,v30,v31,$keyperm | |
733 | lvx v24,$x00,$key_ # pre-load round[1] | |
734 | ?vperm v31,v31,$out0,$keyperm | |
735 | lvx v25,$x10,$key_ # pre-load round[2] | |
736 | ||
737 | #lvx $inptail,0,$inp # "caller" already did this | |
738 | #addi $inp,$inp,15 # 15 is not typo | |
739 | subi $inp,$inp,15 # undo "caller" | |
740 | ||
741 | le?li $idx,8 | |
742 | lvx_u $in0,$x00,$inp # load first 8 "words" | |
743 | le?lvsl $inpperm,0,$idx | |
744 | le?vspltisb $tmp,0x0f | |
745 | lvx_u $in1,$x10,$inp | |
746 | le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u | |
747 | lvx_u $in2,$x20,$inp | |
748 | le?vperm $in0,$in0,$in0,$inpperm | |
749 | lvx_u $in3,$x30,$inp | |
750 | le?vperm $in1,$in1,$in1,$inpperm | |
751 | lvx_u $in4,$x40,$inp | |
752 | le?vperm $in2,$in2,$in2,$inpperm | |
753 | vxor $out0,$in0,$rndkey0 | |
754 | lvx_u $in5,$x50,$inp | |
755 | le?vperm $in3,$in3,$in3,$inpperm | |
756 | vxor $out1,$in1,$rndkey0 | |
757 | lvx_u $in6,$x60,$inp | |
758 | le?vperm $in4,$in4,$in4,$inpperm | |
759 | vxor $out2,$in2,$rndkey0 | |
760 | lvx_u $in7,$x70,$inp | |
761 | addi $inp,$inp,0x80 | |
762 | le?vperm $in5,$in5,$in5,$inpperm | |
763 | vxor $out3,$in3,$rndkey0 | |
764 | le?vperm $in6,$in6,$in6,$inpperm | |
765 | vxor $out4,$in4,$rndkey0 | |
766 | le?vperm $in7,$in7,$in7,$inpperm | |
767 | vxor $out5,$in5,$rndkey0 | |
768 | vxor $out6,$in6,$rndkey0 | |
769 | vxor $out7,$in7,$rndkey0 | |
770 | ||
771 | mtctr $rounds | |
772 | b Loop_cbc_dec8x | |
773 | .align 5 | |
774 | Loop_cbc_dec8x: | |
775 | vncipher $out0,$out0,v24 | |
776 | vncipher $out1,$out1,v24 | |
777 | vncipher $out2,$out2,v24 | |
778 | vncipher $out3,$out3,v24 | |
779 | vncipher $out4,$out4,v24 | |
780 | vncipher $out5,$out5,v24 | |
781 | vncipher $out6,$out6,v24 | |
782 | vncipher $out7,$out7,v24 | |
783 | lvx v24,$x20,$key_ # round[3] | |
784 | addi $key_,$key_,0x20 | |
785 | ||
786 | vncipher $out0,$out0,v25 | |
787 | vncipher $out1,$out1,v25 | |
788 | vncipher $out2,$out2,v25 | |
789 | vncipher $out3,$out3,v25 | |
790 | vncipher $out4,$out4,v25 | |
791 | vncipher $out5,$out5,v25 | |
792 | vncipher $out6,$out6,v25 | |
793 | vncipher $out7,$out7,v25 | |
794 | lvx v25,$x10,$key_ # round[4] | |
795 | bdnz Loop_cbc_dec8x | |
796 | ||
797 | subic $len,$len,128 # $len-=128 | |
798 | vncipher $out0,$out0,v24 | |
799 | vncipher $out1,$out1,v24 | |
800 | vncipher $out2,$out2,v24 | |
801 | vncipher $out3,$out3,v24 | |
802 | vncipher $out4,$out4,v24 | |
803 | vncipher $out5,$out5,v24 | |
804 | vncipher $out6,$out6,v24 | |
805 | vncipher $out7,$out7,v24 | |
806 | ||
807 | subfe. r0,r0,r0 # borrow?-1:0 | |
808 | vncipher $out0,$out0,v25 | |
809 | vncipher $out1,$out1,v25 | |
810 | vncipher $out2,$out2,v25 | |
811 | vncipher $out3,$out3,v25 | |
812 | vncipher $out4,$out4,v25 | |
813 | vncipher $out5,$out5,v25 | |
814 | vncipher $out6,$out6,v25 | |
815 | vncipher $out7,$out7,v25 | |
816 | ||
817 | and r0,r0,$len | |
818 | vncipher $out0,$out0,v26 | |
819 | vncipher $out1,$out1,v26 | |
820 | vncipher $out2,$out2,v26 | |
821 | vncipher $out3,$out3,v26 | |
822 | vncipher $out4,$out4,v26 | |
823 | vncipher $out5,$out5,v26 | |
824 | vncipher $out6,$out6,v26 | |
825 | vncipher $out7,$out7,v26 | |
826 | ||
827 | add $inp,$inp,r0 # $inp is adjusted in such | |
828 | # way that at exit from the | |
829 | # loop inX-in7 are loaded | |
830 | # with last "words" | |
831 | vncipher $out0,$out0,v27 | |
832 | vncipher $out1,$out1,v27 | |
833 | vncipher $out2,$out2,v27 | |
834 | vncipher $out3,$out3,v27 | |
835 | vncipher $out4,$out4,v27 | |
836 | vncipher $out5,$out5,v27 | |
837 | vncipher $out6,$out6,v27 | |
838 | vncipher $out7,$out7,v27 | |
839 | ||
840 | addi $key_,$sp,$FRAME+15 # rewind $key_ | |
841 | vncipher $out0,$out0,v28 | |
842 | vncipher $out1,$out1,v28 | |
843 | vncipher $out2,$out2,v28 | |
844 | vncipher $out3,$out3,v28 | |
845 | vncipher $out4,$out4,v28 | |
846 | vncipher $out5,$out5,v28 | |
847 | vncipher $out6,$out6,v28 | |
848 | vncipher $out7,$out7,v28 | |
849 | lvx v24,$x00,$key_ # re-pre-load round[1] | |
850 | ||
851 | vncipher $out0,$out0,v29 | |
852 | vncipher $out1,$out1,v29 | |
853 | vncipher $out2,$out2,v29 | |
854 | vncipher $out3,$out3,v29 | |
855 | vncipher $out4,$out4,v29 | |
856 | vncipher $out5,$out5,v29 | |
857 | vncipher $out6,$out6,v29 | |
858 | vncipher $out7,$out7,v29 | |
859 | lvx v25,$x10,$key_ # re-pre-load round[2] | |
860 | ||
861 | vncipher $out0,$out0,v30 | |
862 | vxor $ivec,$ivec,v31 # xor with last round key | |
863 | vncipher $out1,$out1,v30 | |
864 | vxor $in0,$in0,v31 | |
865 | vncipher $out2,$out2,v30 | |
866 | vxor $in1,$in1,v31 | |
867 | vncipher $out3,$out3,v30 | |
868 | vxor $in2,$in2,v31 | |
869 | vncipher $out4,$out4,v30 | |
870 | vxor $in3,$in3,v31 | |
871 | vncipher $out5,$out5,v30 | |
872 | vxor $in4,$in4,v31 | |
873 | vncipher $out6,$out6,v30 | |
874 | vxor $in5,$in5,v31 | |
875 | vncipher $out7,$out7,v30 | |
876 | vxor $in6,$in6,v31 | |
877 | ||
878 | vncipherlast $out0,$out0,$ivec | |
879 | vncipherlast $out1,$out1,$in0 | |
880 | lvx_u $in0,$x00,$inp # load next input block | |
881 | vncipherlast $out2,$out2,$in1 | |
882 | lvx_u $in1,$x10,$inp | |
883 | vncipherlast $out3,$out3,$in2 | |
884 | le?vperm $in0,$in0,$in0,$inpperm | |
885 | lvx_u $in2,$x20,$inp | |
886 | vncipherlast $out4,$out4,$in3 | |
887 | le?vperm $in1,$in1,$in1,$inpperm | |
888 | lvx_u $in3,$x30,$inp | |
889 | vncipherlast $out5,$out5,$in4 | |
890 | le?vperm $in2,$in2,$in2,$inpperm | |
891 | lvx_u $in4,$x40,$inp | |
892 | vncipherlast $out6,$out6,$in5 | |
893 | le?vperm $in3,$in3,$in3,$inpperm | |
894 | lvx_u $in5,$x50,$inp | |
895 | vncipherlast $out7,$out7,$in6 | |
896 | le?vperm $in4,$in4,$in4,$inpperm | |
897 | lvx_u $in6,$x60,$inp | |
898 | vmr $ivec,$in7 | |
899 | le?vperm $in5,$in5,$in5,$inpperm | |
900 | lvx_u $in7,$x70,$inp | |
901 | addi $inp,$inp,0x80 | |
902 | ||
903 | le?vperm $out0,$out0,$out0,$inpperm | |
904 | le?vperm $out1,$out1,$out1,$inpperm | |
905 | stvx_u $out0,$x00,$out | |
906 | le?vperm $in6,$in6,$in6,$inpperm | |
907 | vxor $out0,$in0,$rndkey0 | |
908 | le?vperm $out2,$out2,$out2,$inpperm | |
909 | stvx_u $out1,$x10,$out | |
910 | le?vperm $in7,$in7,$in7,$inpperm | |
911 | vxor $out1,$in1,$rndkey0 | |
912 | le?vperm $out3,$out3,$out3,$inpperm | |
913 | stvx_u $out2,$x20,$out | |
914 | vxor $out2,$in2,$rndkey0 | |
915 | le?vperm $out4,$out4,$out4,$inpperm | |
916 | stvx_u $out3,$x30,$out | |
917 | vxor $out3,$in3,$rndkey0 | |
918 | le?vperm $out5,$out5,$out5,$inpperm | |
919 | stvx_u $out4,$x40,$out | |
920 | vxor $out4,$in4,$rndkey0 | |
921 | le?vperm $out6,$out6,$out6,$inpperm | |
922 | stvx_u $out5,$x50,$out | |
923 | vxor $out5,$in5,$rndkey0 | |
924 | le?vperm $out7,$out7,$out7,$inpperm | |
925 | stvx_u $out6,$x60,$out | |
926 | vxor $out6,$in6,$rndkey0 | |
927 | stvx_u $out7,$x70,$out | |
928 | addi $out,$out,0x80 | |
929 | vxor $out7,$in7,$rndkey0 | |
930 | ||
931 | mtctr $rounds | |
932 | beq Loop_cbc_dec8x # did $len-=128 borrow? | |
933 | ||
934 | addic. $len,$len,128 | |
935 | beq Lcbc_dec8x_done | |
936 | nop | |
937 | nop | |
938 | ||
939 | Loop_cbc_dec8x_tail: # up to 7 "words" tail... | |
940 | vncipher $out1,$out1,v24 | |
941 | vncipher $out2,$out2,v24 | |
942 | vncipher $out3,$out3,v24 | |
943 | vncipher $out4,$out4,v24 | |
944 | vncipher $out5,$out5,v24 | |
945 | vncipher $out6,$out6,v24 | |
946 | vncipher $out7,$out7,v24 | |
947 | lvx v24,$x20,$key_ # round[3] | |
948 | addi $key_,$key_,0x20 | |
949 | ||
950 | vncipher $out1,$out1,v25 | |
951 | vncipher $out2,$out2,v25 | |
952 | vncipher $out3,$out3,v25 | |
953 | vncipher $out4,$out4,v25 | |
954 | vncipher $out5,$out5,v25 | |
955 | vncipher $out6,$out6,v25 | |
956 | vncipher $out7,$out7,v25 | |
957 | lvx v25,$x10,$key_ # round[4] | |
958 | bdnz Loop_cbc_dec8x_tail | |
959 | ||
960 | vncipher $out1,$out1,v24 | |
961 | vncipher $out2,$out2,v24 | |
962 | vncipher $out3,$out3,v24 | |
963 | vncipher $out4,$out4,v24 | |
964 | vncipher $out5,$out5,v24 | |
965 | vncipher $out6,$out6,v24 | |
966 | vncipher $out7,$out7,v24 | |
967 | ||
968 | vncipher $out1,$out1,v25 | |
969 | vncipher $out2,$out2,v25 | |
970 | vncipher $out3,$out3,v25 | |
971 | vncipher $out4,$out4,v25 | |
972 | vncipher $out5,$out5,v25 | |
973 | vncipher $out6,$out6,v25 | |
974 | vncipher $out7,$out7,v25 | |
975 | ||
976 | vncipher $out1,$out1,v26 | |
977 | vncipher $out2,$out2,v26 | |
978 | vncipher $out3,$out3,v26 | |
979 | vncipher $out4,$out4,v26 | |
980 | vncipher $out5,$out5,v26 | |
981 | vncipher $out6,$out6,v26 | |
982 | vncipher $out7,$out7,v26 | |
983 | ||
984 | vncipher $out1,$out1,v27 | |
985 | vncipher $out2,$out2,v27 | |
986 | vncipher $out3,$out3,v27 | |
987 | vncipher $out4,$out4,v27 | |
988 | vncipher $out5,$out5,v27 | |
989 | vncipher $out6,$out6,v27 | |
990 | vncipher $out7,$out7,v27 | |
991 | ||
992 | vncipher $out1,$out1,v28 | |
993 | vncipher $out2,$out2,v28 | |
994 | vncipher $out3,$out3,v28 | |
995 | vncipher $out4,$out4,v28 | |
996 | vncipher $out5,$out5,v28 | |
997 | vncipher $out6,$out6,v28 | |
998 | vncipher $out7,$out7,v28 | |
999 | ||
1000 | vncipher $out1,$out1,v29 | |
1001 | vncipher $out2,$out2,v29 | |
1002 | vncipher $out3,$out3,v29 | |
1003 | vncipher $out4,$out4,v29 | |
1004 | vncipher $out5,$out5,v29 | |
1005 | vncipher $out6,$out6,v29 | |
1006 | vncipher $out7,$out7,v29 | |
1007 | ||
1008 | vncipher $out1,$out1,v30 | |
1009 | vxor $ivec,$ivec,v31 # last round key | |
1010 | vncipher $out2,$out2,v30 | |
1011 | vxor $in1,$in1,v31 | |
1012 | vncipher $out3,$out3,v30 | |
1013 | vxor $in2,$in2,v31 | |
1014 | vncipher $out4,$out4,v30 | |
1015 | vxor $in3,$in3,v31 | |
1016 | vncipher $out5,$out5,v30 | |
1017 | vxor $in4,$in4,v31 | |
1018 | vncipher $out6,$out6,v30 | |
1019 | vxor $in5,$in5,v31 | |
1020 | vncipher $out7,$out7,v30 | |
1021 | vxor $in6,$in6,v31 | |
1022 | ||
1023 | cmplwi $len,32 # switch($len) | |
1024 | blt Lcbc_dec8x_one | |
1025 | nop | |
1026 | beq Lcbc_dec8x_two | |
1027 | cmplwi $len,64 | |
1028 | blt Lcbc_dec8x_three | |
1029 | nop | |
1030 | beq Lcbc_dec8x_four | |
1031 | cmplwi $len,96 | |
1032 | blt Lcbc_dec8x_five | |
1033 | nop | |
1034 | beq Lcbc_dec8x_six | |
1035 | ||
1036 | Lcbc_dec8x_seven: | |
1037 | vncipherlast $out1,$out1,$ivec | |
1038 | vncipherlast $out2,$out2,$in1 | |
1039 | vncipherlast $out3,$out3,$in2 | |
1040 | vncipherlast $out4,$out4,$in3 | |
1041 | vncipherlast $out5,$out5,$in4 | |
1042 | vncipherlast $out6,$out6,$in5 | |
1043 | vncipherlast $out7,$out7,$in6 | |
1044 | vmr $ivec,$in7 | |
1045 | ||
1046 | le?vperm $out1,$out1,$out1,$inpperm | |
1047 | le?vperm $out2,$out2,$out2,$inpperm | |
1048 | stvx_u $out1,$x00,$out | |
1049 | le?vperm $out3,$out3,$out3,$inpperm | |
1050 | stvx_u $out2,$x10,$out | |
1051 | le?vperm $out4,$out4,$out4,$inpperm | |
1052 | stvx_u $out3,$x20,$out | |
1053 | le?vperm $out5,$out5,$out5,$inpperm | |
1054 | stvx_u $out4,$x30,$out | |
1055 | le?vperm $out6,$out6,$out6,$inpperm | |
1056 | stvx_u $out5,$x40,$out | |
1057 | le?vperm $out7,$out7,$out7,$inpperm | |
1058 | stvx_u $out6,$x50,$out | |
1059 | stvx_u $out7,$x60,$out | |
1060 | addi $out,$out,0x70 | |
1061 | b Lcbc_dec8x_done | |
1062 | ||
1063 | .align 5 | |
1064 | Lcbc_dec8x_six: | |
1065 | vncipherlast $out2,$out2,$ivec | |
1066 | vncipherlast $out3,$out3,$in2 | |
1067 | vncipherlast $out4,$out4,$in3 | |
1068 | vncipherlast $out5,$out5,$in4 | |
1069 | vncipherlast $out6,$out6,$in5 | |
1070 | vncipherlast $out7,$out7,$in6 | |
1071 | vmr $ivec,$in7 | |
1072 | ||
1073 | le?vperm $out2,$out2,$out2,$inpperm | |
1074 | le?vperm $out3,$out3,$out3,$inpperm | |
1075 | stvx_u $out2,$x00,$out | |
1076 | le?vperm $out4,$out4,$out4,$inpperm | |
1077 | stvx_u $out3,$x10,$out | |
1078 | le?vperm $out5,$out5,$out5,$inpperm | |
1079 | stvx_u $out4,$x20,$out | |
1080 | le?vperm $out6,$out6,$out6,$inpperm | |
1081 | stvx_u $out5,$x30,$out | |
1082 | le?vperm $out7,$out7,$out7,$inpperm | |
1083 | stvx_u $out6,$x40,$out | |
1084 | stvx_u $out7,$x50,$out | |
1085 | addi $out,$out,0x60 | |
1086 | b Lcbc_dec8x_done | |
1087 | ||
1088 | .align 5 | |
1089 | Lcbc_dec8x_five: | |
1090 | vncipherlast $out3,$out3,$ivec | |
1091 | vncipherlast $out4,$out4,$in3 | |
1092 | vncipherlast $out5,$out5,$in4 | |
1093 | vncipherlast $out6,$out6,$in5 | |
1094 | vncipherlast $out7,$out7,$in6 | |
1095 | vmr $ivec,$in7 | |
1096 | ||
1097 | le?vperm $out3,$out3,$out3,$inpperm | |
1098 | le?vperm $out4,$out4,$out4,$inpperm | |
1099 | stvx_u $out3,$x00,$out | |
1100 | le?vperm $out5,$out5,$out5,$inpperm | |
1101 | stvx_u $out4,$x10,$out | |
1102 | le?vperm $out6,$out6,$out6,$inpperm | |
1103 | stvx_u $out5,$x20,$out | |
1104 | le?vperm $out7,$out7,$out7,$inpperm | |
1105 | stvx_u $out6,$x30,$out | |
1106 | stvx_u $out7,$x40,$out | |
1107 | addi $out,$out,0x50 | |
1108 | b Lcbc_dec8x_done | |
1109 | ||
1110 | .align 5 | |
1111 | Lcbc_dec8x_four: | |
1112 | vncipherlast $out4,$out4,$ivec | |
1113 | vncipherlast $out5,$out5,$in4 | |
1114 | vncipherlast $out6,$out6,$in5 | |
1115 | vncipherlast $out7,$out7,$in6 | |
1116 | vmr $ivec,$in7 | |
1117 | ||
1118 | le?vperm $out4,$out4,$out4,$inpperm | |
1119 | le?vperm $out5,$out5,$out5,$inpperm | |
1120 | stvx_u $out4,$x00,$out | |
1121 | le?vperm $out6,$out6,$out6,$inpperm | |
1122 | stvx_u $out5,$x10,$out | |
1123 | le?vperm $out7,$out7,$out7,$inpperm | |
1124 | stvx_u $out6,$x20,$out | |
1125 | stvx_u $out7,$x30,$out | |
1126 | addi $out,$out,0x40 | |
1127 | b Lcbc_dec8x_done | |
1128 | ||
1129 | .align 5 | |
1130 | Lcbc_dec8x_three: | |
1131 | vncipherlast $out5,$out5,$ivec | |
1132 | vncipherlast $out6,$out6,$in5 | |
1133 | vncipherlast $out7,$out7,$in6 | |
1134 | vmr $ivec,$in7 | |
1135 | ||
1136 | le?vperm $out5,$out5,$out5,$inpperm | |
1137 | le?vperm $out6,$out6,$out6,$inpperm | |
1138 | stvx_u $out5,$x00,$out | |
1139 | le?vperm $out7,$out7,$out7,$inpperm | |
1140 | stvx_u $out6,$x10,$out | |
1141 | stvx_u $out7,$x20,$out | |
1142 | addi $out,$out,0x30 | |
1143 | b Lcbc_dec8x_done | |
1144 | ||
1145 | .align 5 | |
1146 | Lcbc_dec8x_two: | |
1147 | vncipherlast $out6,$out6,$ivec | |
1148 | vncipherlast $out7,$out7,$in6 | |
1149 | vmr $ivec,$in7 | |
1150 | ||
1151 | le?vperm $out6,$out6,$out6,$inpperm | |
1152 | le?vperm $out7,$out7,$out7,$inpperm | |
1153 | stvx_u $out6,$x00,$out | |
1154 | stvx_u $out7,$x10,$out | |
1155 | addi $out,$out,0x20 | |
1156 | b Lcbc_dec8x_done | |
1157 | ||
1158 | .align 5 | |
1159 | Lcbc_dec8x_one: | |
1160 | vncipherlast $out7,$out7,$ivec | |
1161 | vmr $ivec,$in7 | |
1162 | ||
1163 | le?vperm $out7,$out7,$out7,$inpperm | |
1164 | stvx_u $out7,0,$out | |
1165 | addi $out,$out,0x10 | |
1166 | ||
1167 | Lcbc_dec8x_done: | |
1168 | le?vperm $ivec,$ivec,$ivec,$inpperm | |
1169 | stvx_u $ivec,0,$ivp # write [unaligned] iv | |
1170 | ||
1171 | li r10,`$FRAME+15` | |
1172 | li r11,`$FRAME+31` | |
1173 | stvx $inpperm,r10,$sp # wipe copies of round keys | |
1174 | addi r10,r10,32 | |
1175 | stvx $inpperm,r11,$sp | |
1176 | addi r11,r11,32 | |
1177 | stvx $inpperm,r10,$sp | |
1178 | addi r10,r10,32 | |
1179 | stvx $inpperm,r11,$sp | |
1180 | addi r11,r11,32 | |
1181 | stvx $inpperm,r10,$sp | |
1182 | addi r10,r10,32 | |
1183 | stvx $inpperm,r11,$sp | |
1184 | addi r11,r11,32 | |
1185 | stvx $inpperm,r10,$sp | |
1186 | addi r10,r10,32 | |
1187 | stvx $inpperm,r11,$sp | |
1188 | addi r11,r11,32 | |
1189 | ||
1190 | mtspr 256,$vrsave | |
1191 | lvx v20,r10,$sp # ABI says so | |
1192 | addi r10,r10,32 | |
1193 | lvx v21,r11,$sp | |
1194 | addi r11,r11,32 | |
1195 | lvx v22,r10,$sp | |
1196 | addi r10,r10,32 | |
1197 | lvx v23,r11,$sp | |
1198 | addi r11,r11,32 | |
1199 | lvx v24,r10,$sp | |
1200 | addi r10,r10,32 | |
1201 | lvx v25,r11,$sp | |
1202 | addi r11,r11,32 | |
1203 | lvx v26,r10,$sp | |
1204 | addi r10,r10,32 | |
1205 | lvx v27,r11,$sp | |
1206 | addi r11,r11,32 | |
1207 | lvx v28,r10,$sp | |
1208 | addi r10,r10,32 | |
1209 | lvx v29,r11,$sp | |
1210 | addi r11,r11,32 | |
1211 | lvx v30,r10,$sp | |
1212 | lvx v31,r11,$sp | |
1213 | $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) | |
1214 | $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) | |
1215 | $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) | |
1216 | $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) | |
1217 | $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) | |
1218 | $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) | |
1219 | addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` | |
1220 | blr | |
1221 | .long 0 | |
1222 | .byte 0,12,0x14,0,0x80,6,6,0 | |
1223 | .long 0 | |
1224 | .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt | |
1225 | ___ | |
1226 | }} }}} | |
1227 | ||
1228 | ######################################################################### | |
1229 | {{{ # CTR procedure[s] # | |
1230 | my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); | |
1231 | my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); | |
1232 | my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= | |
1233 | map("v$_",(4..11)); | |
1234 | my $dat=$tmp; | |
1235 | ||
1236 | $code.=<<___; | |
1237 | .globl .${prefix}_ctr32_encrypt_blocks | |
5c380d62 LB |
1238 | ${UCMP}i $len,1 |
1239 | bltlr- | |
1240 | ||
1241 | lis r0,0xfff0 | |
1242 | mfspr $vrsave,256 | |
1243 | mtspr 256,r0 | |
1244 | ||
1245 | li $idx,15 | |
1246 | vxor $rndkey0,$rndkey0,$rndkey0 | |
1247 | le?vspltisb $tmp,0x0f | |
1248 | ||
1249 | lvx $ivec,0,$ivp # load [unaligned] iv | |
1250 | lvsl $inpperm,0,$ivp | |
1251 | lvx $inptail,$idx,$ivp | |
1252 | vspltisb $one,1 | |
1253 | le?vxor $inpperm,$inpperm,$tmp | |
1254 | vperm $ivec,$ivec,$inptail,$inpperm | |
1255 | vsldoi $one,$rndkey0,$one,1 | |
1256 | ||
1257 | neg r11,$inp | |
1258 | ?lvsl $keyperm,0,$key # prepare for unaligned key | |
1259 | lwz $rounds,240($key) | |
1260 | ||
1261 | lvsr $inpperm,0,r11 # prepare for unaligned load | |
1262 | lvx $inptail,0,$inp | |
1263 | addi $inp,$inp,15 # 15 is not typo | |
1264 | le?vxor $inpperm,$inpperm,$tmp | |
1265 | ||
1266 | srwi $rounds,$rounds,1 | |
1267 | li $idx,16 | |
1268 | subi $rounds,$rounds,1 | |
1269 | ||
1270 | ${UCMP}i $len,8 | |
1271 | bge _aesp8_ctr32_encrypt8x | |
1272 | ||
1273 | ?lvsr $outperm,0,$out # prepare for unaligned store | |
1274 | vspltisb $outmask,-1 | |
1275 | lvx $outhead,0,$out | |
1276 | ?vperm $outmask,$rndkey0,$outmask,$outperm | |
1277 | le?vxor $outperm,$outperm,$tmp | |
1278 | ||
1279 | lvx $rndkey0,0,$key | |
1280 | mtctr $rounds | |
1281 | lvx $rndkey1,$idx,$key | |
1282 | addi $idx,$idx,16 | |
1283 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | |
1284 | vxor $inout,$ivec,$rndkey0 | |
1285 | lvx $rndkey0,$idx,$key | |
1286 | addi $idx,$idx,16 | |
1287 | b Loop_ctr32_enc | |
1288 | ||
1289 | .align 5 | |
1290 | Loop_ctr32_enc: | |
1291 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | |
1292 | vcipher $inout,$inout,$rndkey1 | |
1293 | lvx $rndkey1,$idx,$key | |
1294 | addi $idx,$idx,16 | |
1295 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | |
1296 | vcipher $inout,$inout,$rndkey0 | |
1297 | lvx $rndkey0,$idx,$key | |
1298 | addi $idx,$idx,16 | |
1299 | bdnz Loop_ctr32_enc | |
1300 | ||
1301 | vadduwm $ivec,$ivec,$one | |
1302 | vmr $dat,$inptail | |
1303 | lvx $inptail,0,$inp | |
1304 | addi $inp,$inp,16 | |
1305 | subic. $len,$len,1 # blocks-- | |
1306 | ||
1307 | ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm | |
1308 | vcipher $inout,$inout,$rndkey1 | |
1309 | lvx $rndkey1,$idx,$key | |
1310 | vperm $dat,$dat,$inptail,$inpperm | |
1311 | li $idx,16 | |
1312 | ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm | |
1313 | lvx $rndkey0,0,$key | |
1314 | vxor $dat,$dat,$rndkey1 # last round key | |
1315 | vcipherlast $inout,$inout,$dat | |
1316 | ||
1317 | lvx $rndkey1,$idx,$key | |
1318 | addi $idx,$idx,16 | |
1319 | vperm $inout,$inout,$inout,$outperm | |
1320 | vsel $dat,$outhead,$inout,$outmask | |
1321 | mtctr $rounds | |
1322 | ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm | |
1323 | vmr $outhead,$inout | |
1324 | vxor $inout,$ivec,$rndkey0 | |
1325 | lvx $rndkey0,$idx,$key | |
1326 | addi $idx,$idx,16 | |
1327 | stvx $dat,0,$out | |
1328 | addi $out,$out,16 | |
1329 | bne Loop_ctr32_enc | |
1330 | ||
1331 | addi $out,$out,-1 | |
1332 | lvx $inout,0,$out # redundant in aligned case | |
1333 | vsel $inout,$outhead,$inout,$outmask | |
1334 | stvx $inout,0,$out | |
1335 | ||
1336 | mtspr 256,$vrsave | |
1337 | blr | |
1338 | .long 0 | |
1339 | .byte 0,12,0x14,0,0,0,6,0 | |
1340 | .long 0 | |
1341 | ___ | |
1342 | ######################################################################### | |
1343 | {{ # Optimized CTR procedure # | |
1344 | my $key_="r11"; | |
1345 | my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); | |
1346 | my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); | |
1347 | my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); | |
1348 | my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys | |
1349 | # v26-v31 last 6 round keys | |
1350 | my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment | |
1351 | my ($two,$three,$four)=($outhead,$outperm,$outmask); | |
1352 | ||
1353 | $code.=<<___; | |
1354 | .align 5 | |
1355 | _aesp8_ctr32_encrypt8x: | |
1356 | $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) | |
1357 | li r10,`$FRAME+8*16+15` | |
1358 | li r11,`$FRAME+8*16+31` | |
1359 | stvx v20,r10,$sp # ABI says so | |
1360 | addi r10,r10,32 | |
1361 | stvx v21,r11,$sp | |
1362 | addi r11,r11,32 | |
1363 | stvx v22,r10,$sp | |
1364 | addi r10,r10,32 | |
1365 | stvx v23,r11,$sp | |
1366 | addi r11,r11,32 | |
1367 | stvx v24,r10,$sp | |
1368 | addi r10,r10,32 | |
1369 | stvx v25,r11,$sp | |
1370 | addi r11,r11,32 | |
1371 | stvx v26,r10,$sp | |
1372 | addi r10,r10,32 | |
1373 | stvx v27,r11,$sp | |
1374 | addi r11,r11,32 | |
1375 | stvx v28,r10,$sp | |
1376 | addi r10,r10,32 | |
1377 | stvx v29,r11,$sp | |
1378 | addi r11,r11,32 | |
1379 | stvx v30,r10,$sp | |
1380 | stvx v31,r11,$sp | |
1381 | li r0,-1 | |
1382 | stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave | |
1383 | li $x10,0x10 | |
1384 | $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) | |
1385 | li $x20,0x20 | |
1386 | $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) | |
1387 | li $x30,0x30 | |
1388 | $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) | |
1389 | li $x40,0x40 | |
1390 | $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) | |
1391 | li $x50,0x50 | |
1392 | $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) | |
1393 | li $x60,0x60 | |
1394 | $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) | |
1395 | li $x70,0x70 | |
1396 | mtspr 256,r0 | |
1397 | ||
1398 | subi $rounds,$rounds,3 # -4 in total | |
1399 | ||
1400 | lvx $rndkey0,$x00,$key # load key schedule | |
1401 | lvx v30,$x10,$key | |
1402 | addi $key,$key,0x20 | |
1403 | lvx v31,$x00,$key | |
1404 | ?vperm $rndkey0,$rndkey0,v30,$keyperm | |
1405 | addi $key_,$sp,$FRAME+15 | |
1406 | mtctr $rounds | |
1407 | ||
1408 | Load_ctr32_enc_key: | |
1409 | ?vperm v24,v30,v31,$keyperm | |
1410 | lvx v30,$x10,$key | |
1411 | addi $key,$key,0x20 | |
1412 | stvx v24,$x00,$key_ # off-load round[1] | |
1413 | ?vperm v25,v31,v30,$keyperm | |
1414 | lvx v31,$x00,$key | |
1415 | stvx v25,$x10,$key_ # off-load round[2] | |
1416 | addi $key_,$key_,0x20 | |
1417 | bdnz Load_ctr32_enc_key | |
1418 | ||
1419 | lvx v26,$x10,$key | |
1420 | ?vperm v24,v30,v31,$keyperm | |
1421 | lvx v27,$x20,$key | |
1422 | stvx v24,$x00,$key_ # off-load round[3] | |
1423 | ?vperm v25,v31,v26,$keyperm | |
1424 | lvx v28,$x30,$key | |
1425 | stvx v25,$x10,$key_ # off-load round[4] | |
1426 | addi $key_,$sp,$FRAME+15 # rewind $key_ | |
1427 | ?vperm v26,v26,v27,$keyperm | |
1428 | lvx v29,$x40,$key | |
1429 | ?vperm v27,v27,v28,$keyperm | |
1430 | lvx v30,$x50,$key | |
1431 | ?vperm v28,v28,v29,$keyperm | |
1432 | lvx v31,$x60,$key | |
1433 | ?vperm v29,v29,v30,$keyperm | |
1434 | lvx $out0,$x70,$key # borrow $out0 | |
1435 | ?vperm v30,v30,v31,$keyperm | |
1436 | lvx v24,$x00,$key_ # pre-load round[1] | |
1437 | ?vperm v31,v31,$out0,$keyperm | |
1438 | lvx v25,$x10,$key_ # pre-load round[2] | |
1439 | ||
1d4aa0b4 | 1440 | vadduqm $two,$one,$one |
5c380d62 LB |
1441 | subi $inp,$inp,15 # undo "caller" |
1442 | $SHL $len,$len,4 | |
1443 | ||
1d4aa0b4 LDSB |
1444 | vadduqm $out1,$ivec,$one # counter values ... |
1445 | vadduqm $out2,$ivec,$two | |
5c380d62 LB |
1446 | vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] |
1447 | le?li $idx,8 | |
1d4aa0b4 | 1448 | vadduqm $out3,$out1,$two |
5c380d62 LB |
1449 | vxor $out1,$out1,$rndkey0 |
1450 | le?lvsl $inpperm,0,$idx | |
1d4aa0b4 | 1451 | vadduqm $out4,$out2,$two |
5c380d62 LB |
1452 | vxor $out2,$out2,$rndkey0 |
1453 | le?vspltisb $tmp,0x0f | |
1d4aa0b4 | 1454 | vadduqm $out5,$out3,$two |
5c380d62 LB |
1455 | vxor $out3,$out3,$rndkey0 |
1456 | le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u | |
1d4aa0b4 | 1457 | vadduqm $out6,$out4,$two |
5c380d62 | 1458 | vxor $out4,$out4,$rndkey0 |
1d4aa0b4 | 1459 | vadduqm $out7,$out5,$two |
5c380d62 | 1460 | vxor $out5,$out5,$rndkey0 |
1d4aa0b4 | 1461 | vadduqm $ivec,$out6,$two # next counter value |
5c380d62 LB |
1462 | vxor $out6,$out6,$rndkey0 |
1463 | vxor $out7,$out7,$rndkey0 | |
1464 | ||
1465 | mtctr $rounds | |
1466 | b Loop_ctr32_enc8x | |
1467 | .align 5 | |
1468 | Loop_ctr32_enc8x: | |
1469 | vcipher $out0,$out0,v24 | |
1470 | vcipher $out1,$out1,v24 | |
1471 | vcipher $out2,$out2,v24 | |
1472 | vcipher $out3,$out3,v24 | |
1473 | vcipher $out4,$out4,v24 | |
1474 | vcipher $out5,$out5,v24 | |
1475 | vcipher $out6,$out6,v24 | |
1476 | vcipher $out7,$out7,v24 | |
1477 | Loop_ctr32_enc8x_middle: | |
1478 | lvx v24,$x20,$key_ # round[3] | |
1479 | addi $key_,$key_,0x20 | |
1480 | ||
1481 | vcipher $out0,$out0,v25 | |
1482 | vcipher $out1,$out1,v25 | |
1483 | vcipher $out2,$out2,v25 | |
1484 | vcipher $out3,$out3,v25 | |
1485 | vcipher $out4,$out4,v25 | |
1486 | vcipher $out5,$out5,v25 | |
1487 | vcipher $out6,$out6,v25 | |
1488 | vcipher $out7,$out7,v25 | |
1489 | lvx v25,$x10,$key_ # round[4] | |
1490 | bdnz Loop_ctr32_enc8x | |
1491 | ||
1492 | subic r11,$len,256 # $len-256, borrow $key_ | |
1493 | vcipher $out0,$out0,v24 | |
1494 | vcipher $out1,$out1,v24 | |
1495 | vcipher $out2,$out2,v24 | |
1496 | vcipher $out3,$out3,v24 | |
1497 | vcipher $out4,$out4,v24 | |
1498 | vcipher $out5,$out5,v24 | |
1499 | vcipher $out6,$out6,v24 | |
1500 | vcipher $out7,$out7,v24 | |
1501 | ||
1502 | subfe r0,r0,r0 # borrow?-1:0 | |
1503 | vcipher $out0,$out0,v25 | |
1504 | vcipher $out1,$out1,v25 | |
1505 | vcipher $out2,$out2,v25 | |
1506 | vcipher $out3,$out3,v25 | |
1507 | vcipher $out4,$out4,v25 | |
1508 | vcipher $out5,$out5,v25 | |
1509 | vcipher $out6,$out6,v25 | |
1510 | vcipher $out7,$out7,v25 | |
1511 | ||
1512 | and r0,r0,r11 | |
1513 | addi $key_,$sp,$FRAME+15 # rewind $key_ | |
1514 | vcipher $out0,$out0,v26 | |
1515 | vcipher $out1,$out1,v26 | |
1516 | vcipher $out2,$out2,v26 | |
1517 | vcipher $out3,$out3,v26 | |
1518 | vcipher $out4,$out4,v26 | |
1519 | vcipher $out5,$out5,v26 | |
1520 | vcipher $out6,$out6,v26 | |
1521 | vcipher $out7,$out7,v26 | |
1522 | lvx v24,$x00,$key_ # re-pre-load round[1] | |
1523 | ||
1524 | subic $len,$len,129 # $len-=129 | |
1525 | vcipher $out0,$out0,v27 | |
1526 | addi $len,$len,1 # $len-=128 really | |
1527 | vcipher $out1,$out1,v27 | |
1528 | vcipher $out2,$out2,v27 | |
1529 | vcipher $out3,$out3,v27 | |
1530 | vcipher $out4,$out4,v27 | |
1531 | vcipher $out5,$out5,v27 | |
1532 | vcipher $out6,$out6,v27 | |
1533 | vcipher $out7,$out7,v27 | |
1534 | lvx v25,$x10,$key_ # re-pre-load round[2] | |
1535 | ||
1536 | vcipher $out0,$out0,v28 | |
1537 | lvx_u $in0,$x00,$inp # load input | |
1538 | vcipher $out1,$out1,v28 | |
1539 | lvx_u $in1,$x10,$inp | |
1540 | vcipher $out2,$out2,v28 | |
1541 | lvx_u $in2,$x20,$inp | |
1542 | vcipher $out3,$out3,v28 | |
1543 | lvx_u $in3,$x30,$inp | |
1544 | vcipher $out4,$out4,v28 | |
1545 | lvx_u $in4,$x40,$inp | |
1546 | vcipher $out5,$out5,v28 | |
1547 | lvx_u $in5,$x50,$inp | |
1548 | vcipher $out6,$out6,v28 | |
1549 | lvx_u $in6,$x60,$inp | |
1550 | vcipher $out7,$out7,v28 | |
1551 | lvx_u $in7,$x70,$inp | |
1552 | addi $inp,$inp,0x80 | |
1553 | ||
1554 | vcipher $out0,$out0,v29 | |
1555 | le?vperm $in0,$in0,$in0,$inpperm | |
1556 | vcipher $out1,$out1,v29 | |
1557 | le?vperm $in1,$in1,$in1,$inpperm | |
1558 | vcipher $out2,$out2,v29 | |
1559 | le?vperm $in2,$in2,$in2,$inpperm | |
1560 | vcipher $out3,$out3,v29 | |
1561 | le?vperm $in3,$in3,$in3,$inpperm | |
1562 | vcipher $out4,$out4,v29 | |
1563 | le?vperm $in4,$in4,$in4,$inpperm | |
1564 | vcipher $out5,$out5,v29 | |
1565 | le?vperm $in5,$in5,$in5,$inpperm | |
1566 | vcipher $out6,$out6,v29 | |
1567 | le?vperm $in6,$in6,$in6,$inpperm | |
1568 | vcipher $out7,$out7,v29 | |
1569 | le?vperm $in7,$in7,$in7,$inpperm | |
1570 | ||
1571 | add $inp,$inp,r0 # $inp is adjusted in such | |
1572 | # way that at exit from the | |
1573 | # loop inX-in7 are loaded | |
1574 | # with last "words" | |
1575 | subfe. r0,r0,r0 # borrow?-1:0 | |
1576 | vcipher $out0,$out0,v30 | |
1577 | vxor $in0,$in0,v31 # xor with last round key | |
1578 | vcipher $out1,$out1,v30 | |
1579 | vxor $in1,$in1,v31 | |
1580 | vcipher $out2,$out2,v30 | |
1581 | vxor $in2,$in2,v31 | |
1582 | vcipher $out3,$out3,v30 | |
1583 | vxor $in3,$in3,v31 | |
1584 | vcipher $out4,$out4,v30 | |
1585 | vxor $in4,$in4,v31 | |
1586 | vcipher $out5,$out5,v30 | |
1587 | vxor $in5,$in5,v31 | |
1588 | vcipher $out6,$out6,v30 | |
1589 | vxor $in6,$in6,v31 | |
1590 | vcipher $out7,$out7,v30 | |
1591 | vxor $in7,$in7,v31 | |
1592 | ||
1593 | bne Lctr32_enc8x_break # did $len-129 borrow? | |
1594 | ||
1595 | vcipherlast $in0,$out0,$in0 | |
1596 | vcipherlast $in1,$out1,$in1 | |
1d4aa0b4 | 1597 | vadduqm $out1,$ivec,$one # counter values ... |
5c380d62 | 1598 | vcipherlast $in2,$out2,$in2 |
1d4aa0b4 | 1599 | vadduqm $out2,$ivec,$two |
5c380d62 LB |
1600 | vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] |
1601 | vcipherlast $in3,$out3,$in3 | |
1d4aa0b4 | 1602 | vadduqm $out3,$out1,$two |
5c380d62 LB |
1603 | vxor $out1,$out1,$rndkey0 |
1604 | vcipherlast $in4,$out4,$in4 | |
1d4aa0b4 | 1605 | vadduqm $out4,$out2,$two |
5c380d62 LB |
1606 | vxor $out2,$out2,$rndkey0 |
1607 | vcipherlast $in5,$out5,$in5 | |
1d4aa0b4 | 1608 | vadduqm $out5,$out3,$two |
5c380d62 LB |
1609 | vxor $out3,$out3,$rndkey0 |
1610 | vcipherlast $in6,$out6,$in6 | |
1d4aa0b4 | 1611 | vadduqm $out6,$out4,$two |
5c380d62 LB |
1612 | vxor $out4,$out4,$rndkey0 |
1613 | vcipherlast $in7,$out7,$in7 | |
1d4aa0b4 | 1614 | vadduqm $out7,$out5,$two |
5c380d62 LB |
1615 | vxor $out5,$out5,$rndkey0 |
1616 | le?vperm $in0,$in0,$in0,$inpperm | |
1d4aa0b4 | 1617 | vadduqm $ivec,$out6,$two # next counter value |
5c380d62 LB |
1618 | vxor $out6,$out6,$rndkey0 |
1619 | le?vperm $in1,$in1,$in1,$inpperm | |
1620 | vxor $out7,$out7,$rndkey0 | |
1621 | mtctr $rounds | |
1622 | ||
1623 | vcipher $out0,$out0,v24 | |
1624 | stvx_u $in0,$x00,$out | |
1625 | le?vperm $in2,$in2,$in2,$inpperm | |
1626 | vcipher $out1,$out1,v24 | |
1627 | stvx_u $in1,$x10,$out | |
1628 | le?vperm $in3,$in3,$in3,$inpperm | |
1629 | vcipher $out2,$out2,v24 | |
1630 | stvx_u $in2,$x20,$out | |
1631 | le?vperm $in4,$in4,$in4,$inpperm | |
1632 | vcipher $out3,$out3,v24 | |
1633 | stvx_u $in3,$x30,$out | |
1634 | le?vperm $in5,$in5,$in5,$inpperm | |
1635 | vcipher $out4,$out4,v24 | |
1636 | stvx_u $in4,$x40,$out | |
1637 | le?vperm $in6,$in6,$in6,$inpperm | |
1638 | vcipher $out5,$out5,v24 | |
1639 | stvx_u $in5,$x50,$out | |
1640 | le?vperm $in7,$in7,$in7,$inpperm | |
1641 | vcipher $out6,$out6,v24 | |
1642 | stvx_u $in6,$x60,$out | |
1643 | vcipher $out7,$out7,v24 | |
1644 | stvx_u $in7,$x70,$out | |
1645 | addi $out,$out,0x80 | |
1646 | ||
1647 | b Loop_ctr32_enc8x_middle | |
1648 | ||
1649 | .align 5 | |
1650 | Lctr32_enc8x_break: | |
1651 | cmpwi $len,-0x60 | |
1652 | blt Lctr32_enc8x_one | |
1653 | nop | |
1654 | beq Lctr32_enc8x_two | |
1655 | cmpwi $len,-0x40 | |
1656 | blt Lctr32_enc8x_three | |
1657 | nop | |
1658 | beq Lctr32_enc8x_four | |
1659 | cmpwi $len,-0x20 | |
1660 | blt Lctr32_enc8x_five | |
1661 | nop | |
1662 | beq Lctr32_enc8x_six | |
1663 | cmpwi $len,0x00 | |
1664 | blt Lctr32_enc8x_seven | |
1665 | ||
1666 | Lctr32_enc8x_eight: | |
1667 | vcipherlast $out0,$out0,$in0 | |
1668 | vcipherlast $out1,$out1,$in1 | |
1669 | vcipherlast $out2,$out2,$in2 | |
1670 | vcipherlast $out3,$out3,$in3 | |
1671 | vcipherlast $out4,$out4,$in4 | |
1672 | vcipherlast $out5,$out5,$in5 | |
1673 | vcipherlast $out6,$out6,$in6 | |
1674 | vcipherlast $out7,$out7,$in7 | |
1675 | ||
1676 | le?vperm $out0,$out0,$out0,$inpperm | |
1677 | le?vperm $out1,$out1,$out1,$inpperm | |
1678 | stvx_u $out0,$x00,$out | |
1679 | le?vperm $out2,$out2,$out2,$inpperm | |
1680 | stvx_u $out1,$x10,$out | |
1681 | le?vperm $out3,$out3,$out3,$inpperm | |
1682 | stvx_u $out2,$x20,$out | |
1683 | le?vperm $out4,$out4,$out4,$inpperm | |
1684 | stvx_u $out3,$x30,$out | |
1685 | le?vperm $out5,$out5,$out5,$inpperm | |
1686 | stvx_u $out4,$x40,$out | |
1687 | le?vperm $out6,$out6,$out6,$inpperm | |
1688 | stvx_u $out5,$x50,$out | |
1689 | le?vperm $out7,$out7,$out7,$inpperm | |
1690 | stvx_u $out6,$x60,$out | |
1691 | stvx_u $out7,$x70,$out | |
1692 | addi $out,$out,0x80 | |
1693 | b Lctr32_enc8x_done | |
1694 | ||
1695 | .align 5 | |
1696 | Lctr32_enc8x_seven: | |
1697 | vcipherlast $out0,$out0,$in1 | |
1698 | vcipherlast $out1,$out1,$in2 | |
1699 | vcipherlast $out2,$out2,$in3 | |
1700 | vcipherlast $out3,$out3,$in4 | |
1701 | vcipherlast $out4,$out4,$in5 | |
1702 | vcipherlast $out5,$out5,$in6 | |
1703 | vcipherlast $out6,$out6,$in7 | |
1704 | ||
1705 | le?vperm $out0,$out0,$out0,$inpperm | |
1706 | le?vperm $out1,$out1,$out1,$inpperm | |
1707 | stvx_u $out0,$x00,$out | |
1708 | le?vperm $out2,$out2,$out2,$inpperm | |
1709 | stvx_u $out1,$x10,$out | |
1710 | le?vperm $out3,$out3,$out3,$inpperm | |
1711 | stvx_u $out2,$x20,$out | |
1712 | le?vperm $out4,$out4,$out4,$inpperm | |
1713 | stvx_u $out3,$x30,$out | |
1714 | le?vperm $out5,$out5,$out5,$inpperm | |
1715 | stvx_u $out4,$x40,$out | |
1716 | le?vperm $out6,$out6,$out6,$inpperm | |
1717 | stvx_u $out5,$x50,$out | |
1718 | stvx_u $out6,$x60,$out | |
1719 | addi $out,$out,0x70 | |
1720 | b Lctr32_enc8x_done | |
1721 | ||
1722 | .align 5 | |
1723 | Lctr32_enc8x_six: | |
1724 | vcipherlast $out0,$out0,$in2 | |
1725 | vcipherlast $out1,$out1,$in3 | |
1726 | vcipherlast $out2,$out2,$in4 | |
1727 | vcipherlast $out3,$out3,$in5 | |
1728 | vcipherlast $out4,$out4,$in6 | |
1729 | vcipherlast $out5,$out5,$in7 | |
1730 | ||
1731 | le?vperm $out0,$out0,$out0,$inpperm | |
1732 | le?vperm $out1,$out1,$out1,$inpperm | |
1733 | stvx_u $out0,$x00,$out | |
1734 | le?vperm $out2,$out2,$out2,$inpperm | |
1735 | stvx_u $out1,$x10,$out | |
1736 | le?vperm $out3,$out3,$out3,$inpperm | |
1737 | stvx_u $out2,$x20,$out | |
1738 | le?vperm $out4,$out4,$out4,$inpperm | |
1739 | stvx_u $out3,$x30,$out | |
1740 | le?vperm $out5,$out5,$out5,$inpperm | |
1741 | stvx_u $out4,$x40,$out | |
1742 | stvx_u $out5,$x50,$out | |
1743 | addi $out,$out,0x60 | |
1744 | b Lctr32_enc8x_done | |
1745 | ||
1746 | .align 5 | |
1747 | Lctr32_enc8x_five: | |
1748 | vcipherlast $out0,$out0,$in3 | |
1749 | vcipherlast $out1,$out1,$in4 | |
1750 | vcipherlast $out2,$out2,$in5 | |
1751 | vcipherlast $out3,$out3,$in6 | |
1752 | vcipherlast $out4,$out4,$in7 | |
1753 | ||
1754 | le?vperm $out0,$out0,$out0,$inpperm | |
1755 | le?vperm $out1,$out1,$out1,$inpperm | |
1756 | stvx_u $out0,$x00,$out | |
1757 | le?vperm $out2,$out2,$out2,$inpperm | |
1758 | stvx_u $out1,$x10,$out | |
1759 | le?vperm $out3,$out3,$out3,$inpperm | |
1760 | stvx_u $out2,$x20,$out | |
1761 | le?vperm $out4,$out4,$out4,$inpperm | |
1762 | stvx_u $out3,$x30,$out | |
1763 | stvx_u $out4,$x40,$out | |
1764 | addi $out,$out,0x50 | |
1765 | b Lctr32_enc8x_done | |
1766 | ||
1767 | .align 5 | |
1768 | Lctr32_enc8x_four: | |
1769 | vcipherlast $out0,$out0,$in4 | |
1770 | vcipherlast $out1,$out1,$in5 | |
1771 | vcipherlast $out2,$out2,$in6 | |
1772 | vcipherlast $out3,$out3,$in7 | |
1773 | ||
1774 | le?vperm $out0,$out0,$out0,$inpperm | |
1775 | le?vperm $out1,$out1,$out1,$inpperm | |
1776 | stvx_u $out0,$x00,$out | |
1777 | le?vperm $out2,$out2,$out2,$inpperm | |
1778 | stvx_u $out1,$x10,$out | |
1779 | le?vperm $out3,$out3,$out3,$inpperm | |
1780 | stvx_u $out2,$x20,$out | |
1781 | stvx_u $out3,$x30,$out | |
1782 | addi $out,$out,0x40 | |
1783 | b Lctr32_enc8x_done | |
1784 | ||
1785 | .align 5 | |
1786 | Lctr32_enc8x_three: | |
1787 | vcipherlast $out0,$out0,$in5 | |
1788 | vcipherlast $out1,$out1,$in6 | |
1789 | vcipherlast $out2,$out2,$in7 | |
1790 | ||
1791 | le?vperm $out0,$out0,$out0,$inpperm | |
1792 | le?vperm $out1,$out1,$out1,$inpperm | |
1793 | stvx_u $out0,$x00,$out | |
1794 | le?vperm $out2,$out2,$out2,$inpperm | |
1795 | stvx_u $out1,$x10,$out | |
1796 | stvx_u $out2,$x20,$out | |
1797 | addi $out,$out,0x30 | |
1798 | b Lcbc_dec8x_done | |
1799 | ||
1800 | .align 5 | |
1801 | Lctr32_enc8x_two: | |
1802 | vcipherlast $out0,$out0,$in6 | |
1803 | vcipherlast $out1,$out1,$in7 | |
1804 | ||
1805 | le?vperm $out0,$out0,$out0,$inpperm | |
1806 | le?vperm $out1,$out1,$out1,$inpperm | |
1807 | stvx_u $out0,$x00,$out | |
1808 | stvx_u $out1,$x10,$out | |
1809 | addi $out,$out,0x20 | |
1810 | b Lcbc_dec8x_done | |
1811 | ||
1812 | .align 5 | |
1813 | Lctr32_enc8x_one: | |
1814 | vcipherlast $out0,$out0,$in7 | |
1815 | ||
1816 | le?vperm $out0,$out0,$out0,$inpperm | |
1817 | stvx_u $out0,0,$out | |
1818 | addi $out,$out,0x10 | |
1819 | ||
1820 | Lctr32_enc8x_done: | |
1821 | li r10,`$FRAME+15` | |
1822 | li r11,`$FRAME+31` | |
1823 | stvx $inpperm,r10,$sp # wipe copies of round keys | |
1824 | addi r10,r10,32 | |
1825 | stvx $inpperm,r11,$sp | |
1826 | addi r11,r11,32 | |
1827 | stvx $inpperm,r10,$sp | |
1828 | addi r10,r10,32 | |
1829 | stvx $inpperm,r11,$sp | |
1830 | addi r11,r11,32 | |
1831 | stvx $inpperm,r10,$sp | |
1832 | addi r10,r10,32 | |
1833 | stvx $inpperm,r11,$sp | |
1834 | addi r11,r11,32 | |
1835 | stvx $inpperm,r10,$sp | |
1836 | addi r10,r10,32 | |
1837 | stvx $inpperm,r11,$sp | |
1838 | addi r11,r11,32 | |
1839 | ||
1840 | mtspr 256,$vrsave | |
1841 | lvx v20,r10,$sp # ABI says so | |
1842 | addi r10,r10,32 | |
1843 | lvx v21,r11,$sp | |
1844 | addi r11,r11,32 | |
1845 | lvx v22,r10,$sp | |
1846 | addi r10,r10,32 | |
1847 | lvx v23,r11,$sp | |
1848 | addi r11,r11,32 | |
1849 | lvx v24,r10,$sp | |
1850 | addi r10,r10,32 | |
1851 | lvx v25,r11,$sp | |
1852 | addi r11,r11,32 | |
1853 | lvx v26,r10,$sp | |
1854 | addi r10,r10,32 | |
1855 | lvx v27,r11,$sp | |
1856 | addi r11,r11,32 | |
1857 | lvx v28,r10,$sp | |
1858 | addi r10,r10,32 | |
1859 | lvx v29,r11,$sp | |
1860 | addi r11,r11,32 | |
1861 | lvx v30,r10,$sp | |
1862 | lvx v31,r11,$sp | |
1863 | $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) | |
1864 | $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) | |
1865 | $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) | |
1866 | $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) | |
1867 | $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) | |
1868 | $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) | |
1869 | addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` | |
1870 | blr | |
1871 | .long 0 | |
1872 | .byte 0,12,0x14,0,0x80,6,6,0 | |
1873 | .long 0 | |
1874 | .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks | |
1875 | ___ | |
1876 | }} }}} | |
1877 | ||
1878 | my $consts=1; | |
1879 | foreach(split("\n",$code)) { | |
1880 | s/\`([^\`]*)\`/eval($1)/geo; | |
1881 | ||
1882 | # constants table endian-specific conversion | |
1883 | if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { | |
1884 | my $conv=$3; | |
1885 | my @bytes=(); | |
1886 | ||
1887 | # convert to endian-agnostic format | |
1888 | if ($1 eq "long") { | |
1889 | foreach (split(/,\s*/,$2)) { | |
1890 | my $l = /^0/?oct:int; | |
1891 | push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; | |
1892 | } | |
1893 | } else { | |
1894 | @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); | |
1895 | } | |
1896 | ||
1897 | # little-endian conversion | |
1898 | if ($flavour =~ /le$/o) { | |
1899 | SWITCH: for($conv) { | |
1900 | /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; | |
1901 | /\?rev/ && do { @bytes=reverse(@bytes); last; }; | |
1902 | } | |
1903 | } | |
1904 | ||
1905 | #emit | |
1906 | print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; | |
1907 | next; | |
1908 | } | |
1909 | $consts=0 if (m/Lconsts:/o); # end of table | |
1910 | ||
1911 | # instructions prefixed with '?' are endian-specific and need | |
1912 | # to be adjusted accordingly... | |
1913 | if ($flavour =~ /le$/o) { # little-endian | |
1914 | s/le\?//o or | |
1915 | s/be\?/#be#/o or | |
1916 | s/\?lvsr/lvsl/o or | |
1917 | s/\?lvsl/lvsr/o or | |
1918 | s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or | |
1919 | s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or | |
1920 | s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; | |
1921 | } else { # big-endian | |
1922 | s/le\?/#le#/o or | |
1923 | s/be\?//o or | |
1924 | s/\?([a-z]+)/$1/o; | |
1925 | } | |
1926 | ||
1927 | print $_,"\n"; | |
1928 | } | |
1929 | ||
1930 | close STDOUT; |