Commit | Line | Data |
---|---|---|
b9f535ff JF |
1 | /*************************************************************************** |
2 | * Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> * | |
3 | * * | |
4 | * This program is free software; you can redistribute it and/or modify * | |
5 | * it under the terms of the GNU General Public License as published by * | |
6 | * the Free Software Foundation; either version 2 of the License, or * | |
7 | * (at your option) any later version. * | |
8 | * * | |
9 | * This program is distributed in the hope that it will be useful, * | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * | |
12 | * GNU General Public License for more details. * | |
13 | * * | |
14 | * You should have received a copy of the GNU General Public License * | |
15 | * along with this program; if not, write to the * | |
16 | * Free Software Foundation, Inc., * | |
17 | * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * | |
18 | ***************************************************************************/ | |
19 | ||
20 | .file "twofish-i586-asm.S" | |
21 | .text | |
22 | ||
d3f5188d | 23 | #include <linux/linkage.h> |
b9f535ff JF |
24 | #include <asm/asm-offsets.h> |
25 | ||
3ad2f3fb | 26 | /* return address at 0 */ |
b9f535ff JF |
27 | |
28 | #define in_blk 12 /* input byte array address parameter*/ | |
29 | #define out_blk 8 /* output byte array address parameter*/ | |
91d41f15 | 30 | #define ctx 4 /* Twofish context structure */ |
b9f535ff JF |
31 | |
32 | #define a_offset 0 | |
33 | #define b_offset 4 | |
34 | #define c_offset 8 | |
35 | #define d_offset 12 | |
36 | ||
37 | /* Structure of the crypto context struct*/ | |
38 | ||
39 | #define s0 0 /* S0 Array 256 Words each */ | |
40 | #define s1 1024 /* S1 Array */ | |
41 | #define s2 2048 /* S2 Array */ | |
42 | #define s3 3072 /* S3 Array */ | |
43 | #define w 4096 /* 8 whitening keys (word) */ | |
44 | #define k 4128 /* key 1-32 ( word ) */ | |
45 | ||
46 | /* define a few register aliases to allow macro substitution */ | |
47 | ||
48 | #define R0D %eax | |
49 | #define R0B %al | |
50 | #define R0H %ah | |
51 | ||
52 | #define R1D %ebx | |
53 | #define R1B %bl | |
54 | #define R1H %bh | |
55 | ||
56 | #define R2D %ecx | |
57 | #define R2B %cl | |
58 | #define R2H %ch | |
59 | ||
60 | #define R3D %edx | |
61 | #define R3B %dl | |
62 | #define R3H %dh | |
63 | ||
64 | ||
65 | /* performs input whitening */ | |
66 | #define input_whitening(src,context,offset)\ | |
67 | xor w+offset(context), src; | |
68 | ||
69 | /* performs input whitening */ | |
70 | #define output_whitening(src,context,offset)\ | |
71 | xor w+16+offset(context), src; | |
72 | ||
73 | /* | |
74 | * a input register containing a (rotated 16) | |
75 | * b input register containing b | |
76 | * c input register containing c | |
77 | * d input register containing d (already rol $1) | |
78 | * operations on a and b are interleaved to increase performance | |
79 | */ | |
80 | #define encrypt_round(a,b,c,d,round)\ | |
81 | push d ## D;\ | |
82 | movzx b ## B, %edi;\ | |
83 | mov s1(%ebp,%edi,4),d ## D;\ | |
84 | movzx a ## B, %edi;\ | |
85 | mov s2(%ebp,%edi,4),%esi;\ | |
86 | movzx b ## H, %edi;\ | |
87 | ror $16, b ## D;\ | |
88 | xor s2(%ebp,%edi,4),d ## D;\ | |
89 | movzx a ## H, %edi;\ | |
90 | ror $16, a ## D;\ | |
91 | xor s3(%ebp,%edi,4),%esi;\ | |
92 | movzx b ## B, %edi;\ | |
93 | xor s3(%ebp,%edi,4),d ## D;\ | |
94 | movzx a ## B, %edi;\ | |
95 | xor (%ebp,%edi,4), %esi;\ | |
96 | movzx b ## H, %edi;\ | |
97 | ror $15, b ## D;\ | |
98 | xor (%ebp,%edi,4), d ## D;\ | |
99 | movzx a ## H, %edi;\ | |
100 | xor s1(%ebp,%edi,4),%esi;\ | |
101 | pop %edi;\ | |
102 | add d ## D, %esi;\ | |
103 | add %esi, d ## D;\ | |
104 | add k+round(%ebp), %esi;\ | |
105 | xor %esi, c ## D;\ | |
106 | rol $15, c ## D;\ | |
107 | add k+4+round(%ebp),d ## D;\ | |
108 | xor %edi, d ## D; | |
109 | ||
110 | /* | |
111 | * a input register containing a (rotated 16) | |
112 | * b input register containing b | |
113 | * c input register containing c | |
114 | * d input register containing d (already rol $1) | |
115 | * operations on a and b are interleaved to increase performance | |
116 | * last round has different rotations for the output preparation | |
117 | */ | |
118 | #define encrypt_last_round(a,b,c,d,round)\ | |
119 | push d ## D;\ | |
120 | movzx b ## B, %edi;\ | |
121 | mov s1(%ebp,%edi,4),d ## D;\ | |
122 | movzx a ## B, %edi;\ | |
123 | mov s2(%ebp,%edi,4),%esi;\ | |
124 | movzx b ## H, %edi;\ | |
125 | ror $16, b ## D;\ | |
126 | xor s2(%ebp,%edi,4),d ## D;\ | |
127 | movzx a ## H, %edi;\ | |
128 | ror $16, a ## D;\ | |
129 | xor s3(%ebp,%edi,4),%esi;\ | |
130 | movzx b ## B, %edi;\ | |
131 | xor s3(%ebp,%edi,4),d ## D;\ | |
132 | movzx a ## B, %edi;\ | |
133 | xor (%ebp,%edi,4), %esi;\ | |
134 | movzx b ## H, %edi;\ | |
135 | ror $16, b ## D;\ | |
136 | xor (%ebp,%edi,4), d ## D;\ | |
137 | movzx a ## H, %edi;\ | |
138 | xor s1(%ebp,%edi,4),%esi;\ | |
139 | pop %edi;\ | |
140 | add d ## D, %esi;\ | |
141 | add %esi, d ## D;\ | |
142 | add k+round(%ebp), %esi;\ | |
143 | xor %esi, c ## D;\ | |
144 | ror $1, c ## D;\ | |
145 | add k+4+round(%ebp),d ## D;\ | |
146 | xor %edi, d ## D; | |
147 | ||
148 | /* | |
149 | * a input register containing a | |
150 | * b input register containing b (rotated 16) | |
151 | * c input register containing c | |
152 | * d input register containing d (already rol $1) | |
153 | * operations on a and b are interleaved to increase performance | |
154 | */ | |
155 | #define decrypt_round(a,b,c,d,round)\ | |
156 | push c ## D;\ | |
157 | movzx a ## B, %edi;\ | |
158 | mov (%ebp,%edi,4), c ## D;\ | |
159 | movzx b ## B, %edi;\ | |
160 | mov s3(%ebp,%edi,4),%esi;\ | |
161 | movzx a ## H, %edi;\ | |
162 | ror $16, a ## D;\ | |
163 | xor s1(%ebp,%edi,4),c ## D;\ | |
164 | movzx b ## H, %edi;\ | |
165 | ror $16, b ## D;\ | |
166 | xor (%ebp,%edi,4), %esi;\ | |
167 | movzx a ## B, %edi;\ | |
168 | xor s2(%ebp,%edi,4),c ## D;\ | |
169 | movzx b ## B, %edi;\ | |
170 | xor s1(%ebp,%edi,4),%esi;\ | |
171 | movzx a ## H, %edi;\ | |
172 | ror $15, a ## D;\ | |
173 | xor s3(%ebp,%edi,4),c ## D;\ | |
174 | movzx b ## H, %edi;\ | |
175 | xor s2(%ebp,%edi,4),%esi;\ | |
176 | pop %edi;\ | |
177 | add %esi, c ## D;\ | |
178 | add c ## D, %esi;\ | |
179 | add k+round(%ebp), c ## D;\ | |
180 | xor %edi, c ## D;\ | |
181 | add k+4+round(%ebp),%esi;\ | |
182 | xor %esi, d ## D;\ | |
183 | rol $15, d ## D; | |
184 | ||
185 | /* | |
186 | * a input register containing a | |
187 | * b input register containing b (rotated 16) | |
188 | * c input register containing c | |
189 | * d input register containing d (already rol $1) | |
190 | * operations on a and b are interleaved to increase performance | |
191 | * last round has different rotations for the output preparation | |
192 | */ | |
193 | #define decrypt_last_round(a,b,c,d,round)\ | |
194 | push c ## D;\ | |
195 | movzx a ## B, %edi;\ | |
196 | mov (%ebp,%edi,4), c ## D;\ | |
197 | movzx b ## B, %edi;\ | |
198 | mov s3(%ebp,%edi,4),%esi;\ | |
199 | movzx a ## H, %edi;\ | |
200 | ror $16, a ## D;\ | |
201 | xor s1(%ebp,%edi,4),c ## D;\ | |
202 | movzx b ## H, %edi;\ | |
203 | ror $16, b ## D;\ | |
204 | xor (%ebp,%edi,4), %esi;\ | |
205 | movzx a ## B, %edi;\ | |
206 | xor s2(%ebp,%edi,4),c ## D;\ | |
207 | movzx b ## B, %edi;\ | |
208 | xor s1(%ebp,%edi,4),%esi;\ | |
209 | movzx a ## H, %edi;\ | |
210 | ror $16, a ## D;\ | |
211 | xor s3(%ebp,%edi,4),c ## D;\ | |
212 | movzx b ## H, %edi;\ | |
213 | xor s2(%ebp,%edi,4),%esi;\ | |
214 | pop %edi;\ | |
215 | add %esi, c ## D;\ | |
216 | add c ## D, %esi;\ | |
217 | add k+round(%ebp), c ## D;\ | |
218 | xor %edi, c ## D;\ | |
219 | add k+4+round(%ebp),%esi;\ | |
220 | xor %esi, d ## D;\ | |
221 | ror $1, d ## D; | |
222 | ||
d3f5188d | 223 | ENTRY(twofish_enc_blk) |
b9f535ff JF |
224 | push %ebp /* save registers according to calling convention*/ |
225 | push %ebx | |
226 | push %esi | |
227 | push %edi | |
228 | ||
91d41f15 JK |
229 | mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base |
230 | * pointer to the ctx address */ | |
3ad2f3fb | 231 | mov in_blk+16(%esp),%edi /* input address in edi */ |
b9f535ff JF |
232 | |
233 | mov (%edi), %eax | |
234 | mov b_offset(%edi), %ebx | |
235 | mov c_offset(%edi), %ecx | |
236 | mov d_offset(%edi), %edx | |
237 | input_whitening(%eax,%ebp,a_offset) | |
238 | ror $16, %eax | |
239 | input_whitening(%ebx,%ebp,b_offset) | |
240 | input_whitening(%ecx,%ebp,c_offset) | |
241 | input_whitening(%edx,%ebp,d_offset) | |
242 | rol $1, %edx | |
243 | ||
244 | encrypt_round(R0,R1,R2,R3,0); | |
245 | encrypt_round(R2,R3,R0,R1,8); | |
246 | encrypt_round(R0,R1,R2,R3,2*8); | |
247 | encrypt_round(R2,R3,R0,R1,3*8); | |
248 | encrypt_round(R0,R1,R2,R3,4*8); | |
249 | encrypt_round(R2,R3,R0,R1,5*8); | |
250 | encrypt_round(R0,R1,R2,R3,6*8); | |
251 | encrypt_round(R2,R3,R0,R1,7*8); | |
252 | encrypt_round(R0,R1,R2,R3,8*8); | |
253 | encrypt_round(R2,R3,R0,R1,9*8); | |
254 | encrypt_round(R0,R1,R2,R3,10*8); | |
255 | encrypt_round(R2,R3,R0,R1,11*8); | |
256 | encrypt_round(R0,R1,R2,R3,12*8); | |
257 | encrypt_round(R2,R3,R0,R1,13*8); | |
258 | encrypt_round(R0,R1,R2,R3,14*8); | |
259 | encrypt_last_round(R2,R3,R0,R1,15*8); | |
260 | ||
261 | output_whitening(%eax,%ebp,c_offset) | |
262 | output_whitening(%ebx,%ebp,d_offset) | |
263 | output_whitening(%ecx,%ebp,a_offset) | |
264 | output_whitening(%edx,%ebp,b_offset) | |
265 | mov out_blk+16(%esp),%edi; | |
266 | mov %eax, c_offset(%edi) | |
267 | mov %ebx, d_offset(%edi) | |
268 | mov %ecx, (%edi) | |
269 | mov %edx, b_offset(%edi) | |
270 | ||
271 | pop %edi | |
272 | pop %esi | |
273 | pop %ebx | |
274 | pop %ebp | |
275 | mov $1, %eax | |
276 | ret | |
d3f5188d | 277 | ENDPROC(twofish_enc_blk) |
b9f535ff | 278 | |
d3f5188d | 279 | ENTRY(twofish_dec_blk) |
b9f535ff JF |
280 | push %ebp /* save registers according to calling convention*/ |
281 | push %ebx | |
282 | push %esi | |
283 | push %edi | |
284 | ||
285 | ||
91d41f15 JK |
286 | mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base |
287 | * pointer to the ctx address */ | |
3ad2f3fb | 288 | mov in_blk+16(%esp),%edi /* input address in edi */ |
b9f535ff JF |
289 | |
290 | mov (%edi), %eax | |
291 | mov b_offset(%edi), %ebx | |
292 | mov c_offset(%edi), %ecx | |
293 | mov d_offset(%edi), %edx | |
294 | output_whitening(%eax,%ebp,a_offset) | |
295 | output_whitening(%ebx,%ebp,b_offset) | |
296 | ror $16, %ebx | |
297 | output_whitening(%ecx,%ebp,c_offset) | |
298 | output_whitening(%edx,%ebp,d_offset) | |
299 | rol $1, %ecx | |
300 | ||
301 | decrypt_round(R0,R1,R2,R3,15*8); | |
302 | decrypt_round(R2,R3,R0,R1,14*8); | |
303 | decrypt_round(R0,R1,R2,R3,13*8); | |
304 | decrypt_round(R2,R3,R0,R1,12*8); | |
305 | decrypt_round(R0,R1,R2,R3,11*8); | |
306 | decrypt_round(R2,R3,R0,R1,10*8); | |
307 | decrypt_round(R0,R1,R2,R3,9*8); | |
308 | decrypt_round(R2,R3,R0,R1,8*8); | |
309 | decrypt_round(R0,R1,R2,R3,7*8); | |
310 | decrypt_round(R2,R3,R0,R1,6*8); | |
311 | decrypt_round(R0,R1,R2,R3,5*8); | |
312 | decrypt_round(R2,R3,R0,R1,4*8); | |
313 | decrypt_round(R0,R1,R2,R3,3*8); | |
314 | decrypt_round(R2,R3,R0,R1,2*8); | |
315 | decrypt_round(R0,R1,R2,R3,1*8); | |
316 | decrypt_last_round(R2,R3,R0,R1,0); | |
317 | ||
318 | input_whitening(%eax,%ebp,c_offset) | |
319 | input_whitening(%ebx,%ebp,d_offset) | |
320 | input_whitening(%ecx,%ebp,a_offset) | |
321 | input_whitening(%edx,%ebp,b_offset) | |
322 | mov out_blk+16(%esp),%edi; | |
323 | mov %eax, c_offset(%edi) | |
324 | mov %ebx, d_offset(%edi) | |
325 | mov %ecx, (%edi) | |
326 | mov %edx, b_offset(%edi) | |
327 | ||
328 | pop %edi | |
329 | pop %esi | |
330 | pop %ebx | |
331 | pop %ebp | |
332 | mov $1, %eax | |
333 | ret | |
d3f5188d | 334 | ENDPROC(twofish_dec_blk) |