Commit | Line | Data |
---|---|---|
5c380d62 LB |
1 | #!/usr/bin/env perl |
2 | # | |
3 | # ==================================================================== | |
4 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | |
5 | # project. The module is, however, dual licensed under OpenSSL and | |
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | |
7 | # details see http://www.openssl.org/~appro/cryptogams/. | |
8 | # ==================================================================== | |
9 | # | |
10 | # GHASH for for PowerISA v2.07. | |
11 | # | |
12 | # July 2014 | |
13 | # | |
14 | # Accurate performance measurements are problematic, because it's | |
15 | # always virtualized setup with possibly throttled processor. | |
16 | # Relative comparison is therefore more informative. This initial | |
17 | # version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x | |
18 | # faster than "4-bit" integer-only compiler-generated 64-bit code. | |
19 | # "Initial version" means that there is room for futher improvement. | |
20 | ||
21 | $flavour=shift; | |
22 | $output =shift; | |
23 | ||
24 | if ($flavour =~ /64/) { | |
25 | $SIZE_T=8; | |
26 | $LRSAVE=2*$SIZE_T; | |
27 | $STU="stdu"; | |
28 | $POP="ld"; | |
29 | $PUSH="std"; | |
30 | } elsif ($flavour =~ /32/) { | |
31 | $SIZE_T=4; | |
32 | $LRSAVE=$SIZE_T; | |
33 | $STU="stwu"; | |
34 | $POP="lwz"; | |
35 | $PUSH="stw"; | |
36 | } else { die "nonsense $flavour"; } | |
37 | ||
38 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
39 | ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or | |
40 | ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or | |
41 | die "can't locate ppc-xlate.pl"; | |
42 | ||
43 | open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; | |
44 | ||
45 | my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block | |
46 | ||
47 | my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); | |
48 | my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); | |
49 | my $vrsave="r12"; | |
50 | ||
51 | $code=<<___; | |
52 | .machine "any" | |
53 | ||
54 | .text | |
55 | ||
56 | .globl .gcm_init_p8 | |
5c380d62 LB |
57 | lis r0,0xfff0 |
58 | li r8,0x10 | |
59 | mfspr $vrsave,256 | |
60 | li r9,0x20 | |
61 | mtspr 256,r0 | |
62 | li r10,0x30 | |
63 | lvx_u $H,0,r4 # load H | |
3c5f0ed7 LDSB |
64 | le?xor r7,r7,r7 |
65 | le?addi r7,r7,0x8 # need a vperm start with 08 | |
66 | le?lvsr 5,0,r7 | |
67 | le?vspltisb 6,0x0f | |
68 | le?vxor 5,5,6 # set a b-endian mask | |
69 | le?vperm $H,$H,$H,5 | |
5c380d62 LB |
70 | |
71 | vspltisb $xC2,-16 # 0xf0 | |
72 | vspltisb $t0,1 # one | |
73 | vaddubm $xC2,$xC2,$xC2 # 0xe0 | |
74 | vxor $zero,$zero,$zero | |
75 | vor $xC2,$xC2,$t0 # 0xe1 | |
76 | vsldoi $xC2,$xC2,$zero,15 # 0xe1... | |
77 | vsldoi $t1,$zero,$t0,1 # ...1 | |
78 | vaddubm $xC2,$xC2,$xC2 # 0xc2... | |
79 | vspltisb $t2,7 | |
80 | vor $xC2,$xC2,$t1 # 0xc2....01 | |
81 | vspltb $t1,$H,0 # most significant byte | |
82 | vsl $H,$H,$t0 # H<<=1 | |
83 | vsrab $t1,$t1,$t2 # broadcast carry bit | |
84 | vand $t1,$t1,$xC2 | |
85 | vxor $H,$H,$t1 # twisted H | |
86 | ||
87 | vsldoi $H,$H,$H,8 # twist even more ... | |
88 | vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 | |
89 | vsldoi $Hl,$zero,$H,8 # ... and split | |
90 | vsldoi $Hh,$H,$zero,8 | |
91 | ||
92 | stvx_u $xC2,0,r3 # save pre-computed table | |
93 | stvx_u $Hl,r8,r3 | |
94 | stvx_u $H, r9,r3 | |
95 | stvx_u $Hh,r10,r3 | |
96 | ||
97 | mtspr 256,$vrsave | |
98 | blr | |
99 | .long 0 | |
100 | .byte 0,12,0x14,0,0,0,2,0 | |
101 | .long 0 | |
102 | .size .gcm_init_p8,.-.gcm_init_p8 | |
103 | ||
104 | .globl .gcm_gmult_p8 | |
5c380d62 LB |
105 | lis r0,0xfff8 |
106 | li r8,0x10 | |
107 | mfspr $vrsave,256 | |
108 | li r9,0x20 | |
109 | mtspr 256,r0 | |
110 | li r10,0x30 | |
111 | lvx_u $IN,0,$Xip # load Xi | |
112 | ||
113 | lvx_u $Hl,r8,$Htbl # load pre-computed table | |
114 | le?lvsl $lemask,r0,r0 | |
115 | lvx_u $H, r9,$Htbl | |
116 | le?vspltisb $t0,0x07 | |
117 | lvx_u $Hh,r10,$Htbl | |
118 | le?vxor $lemask,$lemask,$t0 | |
119 | lvx_u $xC2,0,$Htbl | |
120 | le?vperm $IN,$IN,$IN,$lemask | |
121 | vxor $zero,$zero,$zero | |
122 |