Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | |
3 | * | |
4 | * This file is subject to the terms and conditions of the GNU General Public | |
5 | * License. See the file COPYING in the main directory of this archive | |
6 | * for more details. No warranty for anything given at all. | |
7 | */ | |
8d379dad JB |
8 | #include <linux/linkage.h> |
9 | #include <asm/dwarf2.h> | |
10 | #include <asm/errno.h> | |
1da177e4 LT |
11 | |
12 | /* | |
13 | * Checksum copy with exception handling. | |
14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the | |
15 | * destination is zeroed. | |
16 | * | |
17 | * Input | |
18 | * rdi source | |
19 | * rsi destination | |
20 | * edx len (32bit) | |
21 | * ecx sum (32bit) | |
22 | * r8 src_err_ptr (int) | |
23 | * r9 dst_err_ptr (int) | |
24 | * | |
25 | * Output | |
26 | * eax 64bit sum. undefined in case of exception. | |
27 | * | |
28 | * Wrappers need to take care of valid exception sum and zeroing. | |
29 | * They also should align source or destination to 8 bytes. | |
30 | */ | |
31 | ||
32 | .macro source | |
33 | 10: | |
34 | .section __ex_table,"a" | |
35 | .align 8 | |
36 | .quad 10b,.Lbad_source | |
37 | .previous | |
38 | .endm | |
39 | ||
40 | .macro dest | |
41 | 20: | |
42 | .section __ex_table,"a" | |
43 | .align 8 | |
44 | .quad 20b,.Lbad_dest | |
45 | .previous | |
46 | .endm | |
47 | ||
48 | .macro ignore L=.Lignore | |
49 | 30: | |
50 | .section __ex_table,"a" | |
51 | .align 8 | |
52 | .quad 30b,\L | |
53 | .previous | |
54 | .endm | |
55 | ||
56 | ||
8d379dad JB |
57 | ENTRY(csum_partial_copy_generic) |
58 | CFI_STARTPROC | |
1da177e4 LT |
59 | cmpl $3*64,%edx |
60 | jle .Lignore | |
61 | ||
62 | .Lignore: | |
63 | subq $7*8,%rsp | |
8d379dad | 64 | CFI_ADJUST_CFA_OFFSET 7*8 |
1da177e4 | 65 | movq %rbx,2*8(%rsp) |
8d379dad | 66 | CFI_REL_OFFSET rbx, 2*8 |
1da177e4 | 67 | movq %r12,3*8(%rsp) |
8d379dad | 68 | CFI_REL_OFFSET r12, 3*8 |
1da177e4 | 69 | movq %r14,4*8(%rsp) |
8d379dad | 70 | CFI_REL_OFFSET r14, 4*8 |
1da177e4 | 71 | movq %r13,5*8(%rsp) |
8d379dad | 72 | CFI_REL_OFFSET r13, 5*8 |
1da177e4 | 73 | movq %rbp,6*8(%rsp) |
8d379dad | 74 | CFI_REL_OFFSET rbp, 6*8 |
1da177e4 LT |
75 | |
76 | movq %r8,(%rsp) | |
77 | movq %r9,1*8(%rsp) | |
78 | ||
79 | movl %ecx,%eax | |
80 | movl %edx,%ecx | |
81 | ||
82 | xorl %r9d,%r9d | |
83 | movq %rcx,%r12 | |
84 | ||
85 | shrq $6,%r12 | |
86 | jz .Lhandle_tail /* < 64 */ | |
87 | ||
88 | clc | |
89 | ||
90 | /* main loop. clear in 64 byte blocks */ | |
91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | |
92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ | |
93 | /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ | |
94 | .p2align 4 | |
95 | .Lloop: | |
96 | source | |
97 | movq (%rdi),%rbx | |
98 | source | |
99 | movq 8(%rdi),%r8 | |
100 | source | |
101 | movq 16(%rdi),%r11 | |
102 | source | |
103 | movq 24(%rdi),%rdx | |
104 | ||
105 | source | |
106 | movq 32(%rdi),%r10 | |
107 | source | |
108 | movq 40(%rdi),%rbp | |
109 | source | |
110 | movq 48(%rdi),%r14 | |
111 | source | |
112 | movq 56(%rdi),%r13 | |
113 | ||
114 | ignore 2f | |
115 | prefetcht0 5*64(%rdi) | |
116 | 2: | |
117 | adcq %rbx,%rax | |
118 | adcq %r8,%rax | |
119 | adcq %r11,%rax | |
120 | adcq %rdx,%rax | |
121 | adcq %r10,%rax | |
122 | adcq %rbp,%rax | |
123 | adcq %r14,%rax | |
124 | adcq %r13,%rax | |
125 | ||
126 | decl %r12d | |
127 | ||
128 | dest | |
129 | movq %rbx,(%rsi) | |
130 | dest | |
131 | movq %r8,8(%rsi) | |
132 | dest | |
133 | movq %r11,16(%rsi) | |
134 | dest | |
135 | movq %rdx,24(%rsi) | |
136 | ||
137 | dest | |
138 | movq %r10,32(%rsi) | |
139 | dest | |
140 | movq %rbp,40(%rsi) | |
141 | dest | |
142 | movq %r14,48(%rsi) | |
143 | dest | |
144 | movq %r13,56(%rsi) | |
145 | ||
146 | 3: | |
147 | ||
148 | leaq 64(%rdi),%rdi | |
149 | leaq 64(%rsi),%rsi | |
150 | ||
151 | jnz .Lloop | |
152 | ||
153 | adcq %r9,%rax | |
154 | ||
155 | /* do last upto 56 bytes */ | |
156 | .Lhandle_tail: | |
157 | /* ecx: count */ | |
158 | movl %ecx,%r10d | |
159 | andl $63,%ecx | |
160 | shrl $3,%ecx | |
161 | jz .Lfold | |
162 | clc | |
163 | .p2align 4 | |
164 | .Lloop_8: | |
165 | source | |
166 | movq (%rdi),%rbx | |
167 | adcq %rbx,%rax | |
168 | decl %ecx | |
169 | dest | |
170 | movq %rbx,(%rsi) | |
171 | leaq 8(%rsi),%rsi /* preserve carry */ | |
172 | leaq 8(%rdi),%rdi | |
173 | jnz .Lloop_8 | |
174 | adcq %r9,%rax /* add in carry */ | |
175 | ||
176 | .Lfold: | |
177 | /* reduce checksum to 32bits */ | |
178 | movl %eax,%ebx | |
179 | shrq $32,%rax | |
180 | addl %ebx,%eax | |
181 | adcl %r9d,%eax | |
182 | ||
183 | /* do last upto 6 bytes */ | |
184 | .Lhandle_7: | |
185 | movl %r10d,%ecx | |
186 | andl $7,%ecx | |
187 | shrl $1,%ecx | |
188 | jz .Lhandle_1 | |
189 | movl $2,%edx | |
190 | xorl %ebx,%ebx | |
191 | clc | |
192 | .p2align 4 | |
193 | .Lloop_1: | |
194 | source | |
195 | movw (%rdi),%bx | |
196 | adcl %ebx,%eax | |
1da177e4 | 197 | decl %ecx |
92ed0223 | 198 | dest |
1da177e4 LT |
199 | movw %bx,(%rsi) |
200 | leaq 2(%rdi),%rdi | |
201 | leaq 2(%rsi),%rsi | |
202 | jnz .Lloop_1 | |
203 | adcl %r9d,%eax /* add in carry */ | |
204 | ||
205 | /* handle last odd byte */ | |
206 | .Lhandle_1: | |
207 | testl $1,%r10d | |
208 | jz .Lende | |
209 | xorl %ebx,%ebx | |
210 | source | |
211 | movb (%rdi),%bl | |
212 | dest | |
213 | movb %bl,(%rsi) | |
214 | addl %ebx,%eax | |
215 | adcl %r9d,%eax /* carry */ | |
216 | ||
8d379dad | 217 | CFI_REMEMBER_STATE |
1da177e4 LT |
218 | .Lende: |
219 | movq 2*8(%rsp),%rbx | |
8d379dad | 220 | CFI_RESTORE rbx |
1da177e4 | 221 | movq 3*8(%rsp),%r12 |
8d379dad | 222 | CFI_RESTORE r12 |
1da177e4 | 223 | movq 4*8(%rsp),%r14 |
8d379dad | 224 | CFI_RESTORE r14 |
1da177e4 | 225 | movq 5*8(%rsp),%r13 |
8d379dad | 226 | CFI_RESTORE r13 |
1da177e4 | 227 | movq 6*8(%rsp),%rbp |
8d379dad | 228 | CFI_RESTORE rbp |
1da177e4 | 229 | addq $7*8,%rsp |
8d379dad | 230 | CFI_ADJUST_CFA_OFFSET -7*8 |
1da177e4 | 231 | ret |
8d379dad | 232 | CFI_RESTORE_STATE |
1da177e4 LT |
233 | |
234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | |
235 | .Lbad_source: | |
236 | movq (%rsp),%rax | |
237 | testq %rax,%rax | |
238 | jz .Lende | |
239 | movl $-EFAULT,(%rax) | |
240 | jmp .Lende | |
241 | ||
242 | .Lbad_dest: | |
243 | movq 8(%rsp),%rax | |
244 | testq %rax,%rax | |
245 | jz .Lende | |
246 | movl $-EFAULT,(%rax) | |
247 | jmp .Lende | |
8d379dad JB |
248 | CFI_ENDPROC |
249 | ENDPROC(csum_partial_copy_generic) |