Commit | Line | Data |
---|---|---|
2249cbb5 TC |
1 | /* |
2 | * Buffer submit code for multi buffer SHA1 algorithm | |
3 | * | |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
9 | * Copyright(c) 2014 Intel Corporation. | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of version 2 of the GNU General Public License as | |
13 | * published by the Free Software Foundation. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, but | |
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * General Public License for more details. | |
19 | * | |
20 | * Contact Information: | |
21 | * James Guilford <james.guilford@intel.com> | |
22 | * Tim Chen <tim.c.chen@linux.intel.com> | |
23 | * | |
24 | * BSD LICENSE | |
25 | * | |
26 | * Copyright(c) 2014 Intel Corporation. | |
27 | * | |
28 | * Redistribution and use in source and binary forms, with or without | |
29 | * modification, are permitted provided that the following conditions | |
30 | * are met: | |
31 | * | |
32 | * * Redistributions of source code must retain the above copyright | |
33 | * notice, this list of conditions and the following disclaimer. | |
34 | * * Redistributions in binary form must reproduce the above copyright | |
35 | * notice, this list of conditions and the following disclaimer in | |
36 | * the documentation and/or other materials provided with the | |
37 | * distribution. | |
38 | * * Neither the name of Intel Corporation nor the names of its | |
39 | * contributors may be used to endorse or promote products derived | |
40 | * from this software without specific prior written permission. | |
41 | * | |
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
53 | */ | |
54 | ||
55 | #include <linux/linkage.h> | |
56 | #include "sha1_mb_mgr_datastruct.S" | |
57 | ||
58 | ||
59 | .extern sha1_x8_avx | |
60 | ||
61 | # LINUX register definitions | |
62 | arg1 = %rdi | |
63 | arg2 = %rsi | |
64 | size_offset = %rcx | |
65 | tmp2 = %rcx | |
66 | extra_blocks = %rdx | |
67 | ||
68 | # Common definitions | |
69 | #define state arg1 | |
70 | #define job %rsi | |
71 | #define len2 arg2 | |
72 | #define p2 arg2 | |
73 | ||
74 | # idx must be a register not clobberred by sha1_x8_avx2 | |
75 | idx = %r8 | |
76 | DWORD_idx = %r8d | |
77 | last_len = %r8 | |
78 | ||
79 | p = %r11 | |
80 | start_offset = %r11 | |
81 | ||
82 | unused_lanes = %rbx | |
83 | BYTE_unused_lanes = %bl | |
84 | ||
85 | job_rax = %rax | |
86 | len = %rax | |
87 | DWORD_len = %eax | |
88 | ||
68874ac3 JP |
89 | lane = %r12 |
90 | tmp3 = %r12 | |
2249cbb5 TC |
91 | |
92 | tmp = %r9 | |
93 | DWORD_tmp = %r9d | |
94 | ||
95 | lane_data = %r10 | |
96 | ||
2249cbb5 TC |
97 | # JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job) |
98 | # arg 1 : rcx : state | |
99 | # arg 2 : rdx : job | |
100 | ENTRY(sha1_mb_mgr_submit_avx2) | |
aec4d0e3 | 101 | push %rbx |
68874ac3 | 102 | push %r12 |
2249cbb5 TC |
103 | |
104 | mov _unused_lanes(state), unused_lanes | |
105 | mov unused_lanes, lane | |
106 | and $0xF, lane | |
107 | shr $4, unused_lanes | |
108 | imul $_LANE_DATA_size, lane, lane_data | |
109 | movl $STS_BEING_PROCESSED, _status(job) | |
110 | lea _ldata(state, lane_data), lane_data | |
111 | mov unused_lanes, _unused_lanes(state) | |
112 | movl _len(job), DWORD_len | |
113 | ||
114 | mov job, _job_in_lane(lane_data) | |
115 | shl $4, len | |
116 | or lane, len | |
117 | ||
118 | movl DWORD_len, _lens(state , lane, 4) | |
119 | ||
120 | # Load digest words from result_digest | |
121 | vmovdqu _result_digest(job), %xmm0 | |
122 | mov _result_digest+1*16(job), DWORD_tmp | |
123 | vmovd %xmm0, _args_digest(state, lane, 4) | |
124 | vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) | |
125 | vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) | |
126 | vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) | |
127 | movl DWORD_tmp, _args_digest+4*32(state , lane, 4) | |
128 | ||
129 | mov _buffer(job), p | |
130 | mov p, _args_data_ptr(state, lane, 8) | |
131 | ||
132 | cmp $0xF, unused_lanes | |
133 | jne return_null | |
134 | ||
135 | start_loop: | |
136 | # Find min length | |
137 | vmovdqa _lens(state), %xmm0 | |
138 | vmovdqa _lens+1*16(state), %xmm1 | |
139 | ||
140 | vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} | |
141 | vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} | |
142 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} | |
143 | vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} | |
144 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword | |
145 | ||
146 | vmovd %xmm2, DWORD_idx | |
147 | mov idx, len2 | |
148 | and $0xF, idx | |
149 | shr $4, len2 | |
150 | jz len_is_0 | |
151 | ||
152 | vpand clear_low_nibble(%rip), %xmm2, %xmm2 | |
153 | vpshufd $0, %xmm2, %xmm2 | |
154 | ||
155 | vpsubd %xmm2, %xmm0, %xmm0 | |
156 | vpsubd %xmm2, %xmm1, %xmm1 | |
157 | ||
158 | vmovdqa %xmm0, _lens + 0*16(state) | |
159 | vmovdqa %xmm1, _lens + 1*16(state) | |
160 | ||
161 | ||
162 | # "state" and "args" are the same address, arg1 | |
163 | # len is arg2 | |
164 | call sha1_x8_avx2 | |
165 | ||
166 | # state and idx are intact | |
167 | ||
168 | len_is_0: | |
169 | # process completed job "idx" | |
170 | imul $_LANE_DATA_size, idx, lane_data | |
171 | lea _ldata(state, lane_data), lane_data | |
172 | ||
173 | mov _job_in_lane(lane_data), job_rax | |
174 | mov _unused_lanes(state), unused_lanes | |
175 | movq $0, _job_in_lane(lane_data) | |
176 | movl $STS_COMPLETED, _status(job_rax) | |
177 | shl $4, unused_lanes | |
178 | or idx, unused_lanes | |
179 | mov unused_lanes, _unused_lanes(state) | |
180 | ||
181 | movl $0xFFFFFFFF, _lens(state, idx, 4) | |
182 | ||
183 | vmovd _args_digest(state, idx, 4), %xmm0 | |
184 | vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 | |
185 | vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 | |
186 | vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 | |
187 | movl 4*32(state, idx, 4), DWORD_tmp | |
188 | ||
189 | vmovdqu %xmm0, _result_digest(job_rax) | |
190 | movl DWORD_tmp, _result_digest+1*16(job_rax) | |
191 | ||
192 | return: | |
68874ac3 | 193 | pop %r12 |
aec4d0e3 | 194 | pop %rbx |
2249cbb5 TC |
195 | ret |
196 | ||
197 | return_null: | |
198 | xor job_rax, job_rax | |
199 | jmp return | |
200 | ||
201 | ENDPROC(sha1_mb_mgr_submit_avx2) | |
202 | ||
203 | .data | |
204 | ||
205 | .align 16 | |
206 | clear_low_nibble: | |
207 | .octa 0x000000000000000000000000FFFFFFF0 |