600a2334 |
1 | /* |
2 | * Intel SHA Extensions optimized implementation of a SHA-256 update function |
3 | * |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or |
5 | * redistributing this file, you may do so under either license. |
6 | * |
7 | * GPL LICENSE SUMMARY |
8 | * |
9 | * Copyright(c) 2015 Intel Corporation. |
10 | * |
11 | * This program is free software; you can redistribute it and/or modify |
12 | * it under the terms of version 2 of the GNU General Public License as |
13 | * published by the Free Software Foundation. |
14 | * |
15 | * This program is distributed in the hope that it will be useful, but |
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 | * General Public License for more details. |
19 | * |
20 | * Contact Information: |
21 | * Sean Gulley <sean.m.gulley@intel.com> |
22 | * Tim Chen <tim.c.chen@linux.intel.com> |
23 | * |
24 | * BSD LICENSE |
25 | * |
26 | * Copyright(c) 2015 Intel Corporation. |
27 | * |
28 | * Redistribution and use in source and binary forms, with or without |
29 | * modification, are permitted provided that the following conditions |
30 | * are met: |
31 | * |
32 | * * Redistributions of source code must retain the above copyright |
33 | * notice, this list of conditions and the following disclaimer. |
34 | * * Redistributions in binary form must reproduce the above copyright |
35 | * notice, this list of conditions and the following disclaimer in |
36 | * the documentation and/or other materials provided with the |
37 | * distribution. |
38 | * * Neither the name of Intel Corporation nor the names of its |
39 | * contributors may be used to endorse or promote products derived |
40 | * from this software without specific prior written permission. |
41 | * |
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
53 | * |
54 | */ |
55 | |
56 | #include <linux/linkage.h> |
57 | |
58 | #define DIGEST_PTR %rdi /* 1st arg */ |
59 | #define DATA_PTR %rsi /* 2nd arg */ |
60 | #define NUM_BLKS %rdx /* 3rd arg */ |
61 | |
62 | #define SHA256CONSTANTS %rax |
63 | |
64 | #define MSG %xmm0 |
65 | #define STATE0 %xmm1 |
66 | #define STATE1 %xmm2 |
67 | #define MSGTMP0 %xmm3 |
68 | #define MSGTMP1 %xmm4 |
69 | #define MSGTMP2 %xmm5 |
70 | #define MSGTMP3 %xmm6 |
71 | #define MSGTMP4 %xmm7 |
72 | |
73 | #define SHUF_MASK %xmm8 |
74 | |
75 | #define ABEF_SAVE %xmm9 |
76 | #define CDGH_SAVE %xmm10 |
77 | |
78 | /* |
79 | * Intel SHA Extensions optimized implementation of a SHA-256 update function |
80 | * |
81 | * The function takes a pointer to the current hash values, a pointer to the |
82 | * input data, and a number of 64 byte blocks to process. Once all blocks have |
83 | * been processed, the digest pointer is updated with the resulting hash value. |
84 | * The function only processes complete blocks, there is no functionality to |
85 | * store partial blocks. All message padding and hash value initialization must |
86 | * be done outside the update function. |
87 | * |
88 | * The indented lines in the loop are instructions related to rounds processing. |
89 | * The non-indented lines are instructions related to the message schedule. |
90 | * |
91 | * void sha256_ni_transform(uint32_t *digest, const void *data, |
92 | uint32_t numBlocks); |
93 | * digest : pointer to digest |
94 | * data: pointer to input data |
95 | * numBlocks: Number of blocks to process |
96 | */ |
97 | |
98 | .text |
99 | .align 32 |
100 | ENTRY(sha256_ni_transform) |
101 | |
102 | shl $6, NUM_BLKS /* convert to bytes */ |
103 | jz .Ldone_hash |
104 | add DATA_PTR, NUM_BLKS /* pointer to end of data */ |
105 | |
106 | /* |
107 | * load initial hash values |
108 | * Need to reorder these appropriately |
109 | * DCBA, HGFE -> ABEF, CDGH |
110 | */ |
111 | movdqu 0*16(DIGEST_PTR), STATE0 |
112 | movdqu 1*16(DIGEST_PTR), STATE1 |
113 | |
114 | pshufd $0xB1, STATE0, STATE0 /* CDAB */ |
115 | pshufd $0x1B, STATE1, STATE1 /* EFGH */ |
116 | movdqa STATE0, MSGTMP4 |
117 | palignr $8, STATE1, STATE0 /* ABEF */ |
118 | pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ |
119 | |
120 | movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK |
121 | lea K256(%rip), SHA256CONSTANTS |
122 | |
123 | .Lloop0: |
124 | /* Save hash values for addition after rounds */ |
125 | movdqa STATE0, ABEF_SAVE |
126 | movdqa STATE1, CDGH_SAVE |
127 | |
128 | /* Rounds 0-3 */ |
129 | movdqu 0*16(DATA_PTR), MSG |
130 | pshufb SHUF_MASK, MSG |
131 | movdqa MSG, MSGTMP0 |
132 | paddd 0*16(SHA256CONSTANTS), MSG |
133 | sha256rnds2 STATE0, STATE1 |
134 | pshufd $0x0E, MSG, MSG |
135 | sha256rnds2 STATE1, STATE0 |
136 | |
137 | /* Rounds 4-7 */ |
138 | movdqu 1*16(DATA_PTR), MSG |
139 | pshufb SHUF_MASK, MSG |
140 | movdqa MSG, MSGTMP1 |
141 | paddd 1*16(SHA256CONSTANTS), MSG |
142 | sha256rnds2 STATE0, STATE1 |
143 | pshufd $0x0E, MSG, MSG |
144 | sha256rnds2 STATE1, STATE0 |
145 | sha256msg1 MSGTMP1, MSGTMP0 |
146 | |
147 | /* Rounds 8-11 */ |
148 | movdqu 2*16(DATA_PTR), MSG |
149 | pshufb SHUF_MASK, MSG |
150 | movdqa MSG, MSGTMP2 |
151 | paddd 2*16(SHA256CONSTANTS), MSG |
152 | sha256rnds2 STATE0, STATE1 |
153 | pshufd $0x0E, MSG, MSG |
154 | sha256rnds2 STATE1, STATE0 |
155 | sha256msg1 MSGTMP2, MSGTMP1 |
156 | |
157 | /* Rounds 12-15 */ |
158 | movdqu 3*16(DATA_PTR), MSG |
159 | pshufb SHUF_MASK, MSG |
160 | movdqa MSG, MSGTMP3 |
161 | paddd 3*16(SHA256CONSTANTS), MSG |
162 | sha256rnds2 STATE0, STATE1 |
163 | movdqa MSGTMP3, MSGTMP4 |
164 | palignr $4, MSGTMP2, MSGTMP4 |
165 | paddd MSGTMP4, MSGTMP0 |
166 | sha256msg2 MSGTMP3, MSGTMP0 |
167 | pshufd $0x0E, MSG, MSG |
168 | sha256rnds2 STATE1, STATE0 |
169 | sha256msg1 MSGTMP3, MSGTMP2 |
170 | |
171 | /* Rounds 16-19 */ |
172 | movdqa MSGTMP0, MSG |
173 | paddd 4*16(SHA256CONSTANTS), MSG |
174 | sha256rnds2 STATE0, STATE1 |
175 | movdqa MSGTMP0, MSGTMP4 |
176 | palignr $4, MSGTMP3, MSGTMP4 |
177 | paddd MSGTMP4, MSGTMP1 |
178 | sha256msg2 MSGTMP0, MSGTMP1 |
179 | pshufd $0x0E, MSG, MSG |
180 | sha256rnds2 STATE1, STATE0 |
181 | sha256msg1 MSGTMP0, MSGTMP3 |
182 | |
183 | /* Rounds 20-23 */ |
184 | movdqa MSGTMP1, MSG |
185 | paddd 5*16(SHA256CONSTANTS), MSG |
186 | sha256rnds2 STATE0, STATE1 |
187 | movdqa MSGTMP1, MSGTMP4 |
188 | palignr $4, MSGTMP0, MSGTMP4 |
189 | paddd MSGTMP4, MSGTMP2 |
190 | sha256msg2 MSGTMP1, MSGTMP2 |
191 | pshufd $0x0E, MSG, MSG |
192 | sha256rnds2 STATE1, STATE0 |
193 | sha256msg1 MSGTMP1, MSGTMP0 |
194 | |
195 | /* Rounds 24-27 */ |
196 | movdqa MSGTMP2, MSG |
197 | paddd 6*16(SHA256CONSTANTS), MSG |
198 | sha256rnds2 STATE0, STATE1 |
199 | movdqa MSGTMP2, MSGTMP4 |
200 | palignr $4, MSGTMP1, MSGTMP4 |
201 | paddd MSGTMP4, MSGTMP3 |
202 | sha256msg2 MSGTMP2, MSGTMP3 |
203 | pshufd $0x0E, MSG, MSG |
204 | sha256rnds2 STATE1, STATE0 |
205 | sha256msg1 MSGTMP2, MSGTMP1 |
206 | |
207 | /* Rounds 28-31 */ |
208 | movdqa MSGTMP3, MSG |
209 | paddd 7*16(SHA256CONSTANTS), MSG |
210 | sha256rnds2 STATE0, STATE1 |
211 | movdqa MSGTMP3, MSGTMP4 |
212 | palignr $4, MSGTMP2, MSGTMP4 |
213 | paddd MSGTMP4, MSGTMP0 |
214 | sha256msg2 MSGTMP3, MSGTMP0 |
215 | pshufd $0x0E, MSG, MSG |
216 | sha256rnds2 STATE1, STATE0 |
217 | sha256msg1 MSGTMP3, MSGTMP2 |
218 | |
219 | /* Rounds 32-35 */ |
220 | movdqa MSGTMP0, MSG |
221 | paddd 8*16(SHA256CONSTANTS), MSG |
222 | sha256rnds2 STATE0, STATE1 |
223 | movdqa MSGTMP0, MSGTMP4 |
224 | palignr $4, MSGTMP3, MSGTMP4 |
225 | paddd MSGTMP4, MSGTMP1 |
226 | sha256msg2 MSGTMP0, MSGTMP1 |
227 | pshufd $0x0E, MSG, MSG |
228 | sha256rnds2 STATE1, STATE0 |
229 | sha256msg1 MSGTMP0, MSGTMP3 |
230 | |
231 | /* Rounds 36-39 */ |
232 | movdqa MSGTMP1, MSG |
233 | paddd 9*16(SHA256CONSTANTS), MSG |
234 | sha256rnds2 STATE0, STATE1 |
235 | movdqa MSGTMP1, MSGTMP4 |
236 | palignr $4, MSGTMP0, MSGTMP4 |
237 | paddd MSGTMP4, MSGTMP2 |
238 | sha256msg2 MSGTMP1, MSGTMP2 |
239 | pshufd $0x0E, MSG, MSG |
240 | sha256rnds2 STATE1, STATE0 |
241 | sha256msg1 MSGTMP1, MSGTMP0 |
242 | |
243 | /* Rounds 40-43 */ |
244 | movdqa MSGTMP2, MSG |
245 | paddd 10*16(SHA256CONSTANTS), MSG |
246 | sha256rnds2 STATE0, STATE1 |
247 | movdqa MSGTMP2, MSGTMP4 |
248 | palignr $4, MSGTMP1, MSGTMP4 |
249 | paddd MSGTMP4, MSGTMP3 |
250 | sha256msg2 MSGTMP2, MSGTMP3 |
251 | pshufd $0x0E, MSG, MSG |
252 | sha256rnds2 STATE1, STATE0 |
253 | sha256msg1 MSGTMP2, MSGTMP1 |
254 | |
255 | /* Rounds 44-47 */ |
256 | movdqa MSGTMP3, MSG |
257 | paddd 11*16(SHA256CONSTANTS), MSG |
258 | sha256rnds2 STATE0, STATE1 |
259 | movdqa MSGTMP3, MSGTMP4 |
260 | palignr $4, MSGTMP2, MSGTMP4 |
261 | paddd MSGTMP4, MSGTMP0 |
262 | sha256msg2 MSGTMP3, MSGTMP0 |
263 | pshufd $0x0E, MSG, MSG |
264 | sha256rnds2 STATE1, STATE0 |
265 | sha256msg1 MSGTMP3, MSGTMP2 |
266 | |
267 | /* Rounds 48-51 */ |
268 | movdqa MSGTMP0, MSG |
269 | paddd 12*16(SHA256CONSTANTS), MSG |
270 | sha256rnds2 STATE0, STATE1 |
271 | movdqa MSGTMP0, MSGTMP4 |
272 | palignr $4, MSGTMP3, MSGTMP4 |
273 | paddd MSGTMP4, MSGTMP1 |
274 | sha256msg2 MSGTMP0, MSGTMP1 |
275 | pshufd $0x0E, MSG, MSG |
276 | sha256rnds2 STATE1, STATE0 |
277 | sha256msg1 MSGTMP0, MSGTMP3 |
278 | |
279 | /* Rounds 52-55 */ |
280 | movdqa MSGTMP1, MSG |
281 | paddd 13*16(SHA256CONSTANTS), MSG |
282 | sha256rnds2 STATE0, STATE1 |
283 | movdqa MSGTMP1, MSGTMP4 |
284 | palignr $4, MSGTMP0, MSGTMP4 |
285 | paddd MSGTMP4, MSGTMP2 |
286 | sha256msg2 MSGTMP1, MSGTMP2 |
287 | pshufd $0x0E, MSG, MSG |
288 | sha256rnds2 STATE1, STATE0 |
289 | |
290 | /* Rounds 56-59 */ |
291 | movdqa MSGTMP2, MSG |
292 | paddd 14*16(SHA256CONSTANTS), MSG |
293 | sha256rnds2 STATE0, STATE1 |
294 | movdqa MSGTMP2, MSGTMP4 |
295 | palignr $4, MSGTMP1, MSGTMP4 |
296 | paddd MSGTMP4, MSGTMP3 |
297 | sha256msg2 MSGTMP2, MSGTMP3 |
298 | pshufd $0x0E, MSG, MSG |
299 | sha256rnds2 STATE1, STATE0 |
300 | |
301 | /* Rounds 60-63 */ |
302 | movdqa MSGTMP3, MSG |
303 | paddd 15*16(SHA256CONSTANTS), MSG |
304 | sha256rnds2 STATE0, STATE1 |
305 | pshufd $0x0E, MSG, MSG |
306 | sha256rnds2 STATE1, STATE0 |
307 | |
308 | /* Add current hash values with previously saved */ |
309 | paddd ABEF_SAVE, STATE0 |
310 | paddd CDGH_SAVE, STATE1 |
311 | |
312 | /* Increment data pointer and loop if more to process */ |
313 | add $64, DATA_PTR |
314 | cmp NUM_BLKS, DATA_PTR |
315 | jne .Lloop0 |
316 | |
317 | /* Write hash values back in the correct order */ |
318 | pshufd $0x1B, STATE0, STATE0 /* FEBA */ |
319 | pshufd $0xB1, STATE1, STATE1 /* DCHG */ |
320 | movdqa STATE0, MSGTMP4 |
321 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ |
322 | palignr $8, MSGTMP4, STATE1 /* HGFE */ |
323 | |
324 | movdqu STATE0, 0*16(DIGEST_PTR) |
325 | movdqu STATE1, 1*16(DIGEST_PTR) |
326 | |
327 | .Ldone_hash: |
328 | |
329 | ret |
330 | ENDPROC(sha256_ni_transform) |
331 | |
332 | .data |
333 | .align 64 |
334 | K256: |
335 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 |
336 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 |
337 | .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 |
338 | .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 |
339 | .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc |
340 | .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da |
341 | .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 |
342 | .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 |
343 | .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 |
344 | .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 |
345 | .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 |
346 | .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 |
347 | .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 |
348 | .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 |
349 | .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 |
350 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 |
351 | |
352 | PSHUFFLE_BYTE_FLIP_MASK: |
353 | .octa 0x0c0d0e0f08090a0b0405060700010203 |