Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | |
3 | * | |
4 | * This file is subject to the terms and conditions of the GNU General Public | |
5 | * License. See the file COPYING in the main directory of this archive | |
6 | * for more details. No warranty for anything given at all. | |
7 | */ | |
8 | #include <linux/linkage.h> | |
9 | #include <asm/errno.h> | |
10 | ||
11 | /* | |
12 | * Checksum copy with exception handling. | |
13 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the | |
14 | * destination is zeroed. | |
15 | * | |
16 | * Input | |
17 | * rdi source | |
18 | * rsi destination | |
19 | * edx len (32bit) | |
20 | * ecx sum (32bit) | |
21 | * r8 src_err_ptr (int) | |
22 | * r9 dst_err_ptr (int) | |
23 | * | |
24 | * Output | |
25 | * eax 64bit sum. undefined in case of exception. | |
26 | * | |
27 | * Wrappers need to take care of valid exception sum and zeroing. | |
28 | * They also should align source or destination to 8 bytes. | |
29 | */ | |
30 | ||
31 | .macro source | |
32 | 10: | |
33 | .section __ex_table,"a" | |
34 | .align 8 | |
35 | .quad 10b,.Lbad_source | |
36 | .previous | |
37 | .endm | |
38 | ||
39 | .macro dest | |
40 | 20: | |
41 | .section __ex_table,"a" | |
42 | .align 8 | |
43 | .quad 20b,.Lbad_dest | |
44 | .previous | |
45 | .endm | |
46 | ||
47 | .macro ignore L=.Lignore | |
48 | 30: | |
49 | .section __ex_table,"a" | |
50 | .align 8 | |
51 | .quad 30b,\L | |
52 | .previous | |
53 | .endm | |
54 | ||
55 | ||
56 | .globl csum_partial_copy_generic | |
57 | .p2align 4 | |
58 | csum_partial_copy_generic: | |
59 | cmpl $3*64,%edx | |
60 | jle .Lignore | |
61 | ||
62 | .Lignore: | |
63 | subq $7*8,%rsp | |
64 | movq %rbx,2*8(%rsp) | |
65 | movq %r12,3*8(%rsp) | |
66 | movq %r14,4*8(%rsp) | |
67 | movq %r13,5*8(%rsp) | |
68 | movq %rbp,6*8(%rsp) | |
69 | ||
70 | movq %r8,(%rsp) | |
71 | movq %r9,1*8(%rsp) | |
72 | ||
73 | movl %ecx,%eax | |
74 | movl %edx,%ecx | |
75 | ||
76 | xorl %r9d,%r9d | |
77 | movq %rcx,%r12 | |
78 | ||
79 | shrq $6,%r12 | |
80 | jz .Lhandle_tail /* < 64 */ | |
81 | ||
82 | clc | |
83 | ||
84 | /* main loop. clear in 64 byte blocks */ | |
85 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | |
86 | /* r11: temp3, rdx: temp4, r12 loopcnt */ | |
87 | /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ | |
88 | .p2align 4 | |
89 | .Lloop: | |
90 | source | |
91 | movq (%rdi),%rbx | |
92 | source | |
93 | movq 8(%rdi),%r8 | |
94 | source | |
95 | movq 16(%rdi),%r11 | |
96 | source | |
97 | movq 24(%rdi),%rdx | |
98 | ||
99 | source | |
100 | movq 32(%rdi),%r10 | |
101 | source | |
102 | movq 40(%rdi),%rbp | |
103 | source | |
104 | movq 48(%rdi),%r14 | |
105 | source | |
106 | movq 56(%rdi),%r13 | |
107 | ||
108 | ignore 2f | |
109 | prefetcht0 5*64(%rdi) | |
110 | 2: | |
111 | adcq %rbx,%rax | |
112 | adcq %r8,%rax | |
113 | adcq %r11,%rax | |
114 | adcq %rdx,%rax | |
115 | adcq %r10,%rax | |
116 | adcq %rbp,%rax | |
117 | adcq %r14,%rax | |
118 | adcq %r13,%rax | |
119 | ||
120 | decl %r12d | |
121 | ||
122 | dest | |
123 | movq %rbx,(%rsi) | |
124 | dest | |
125 | movq %r8,8(%rsi) | |
126 | dest | |
127 | movq %r11,16(%rsi) | |
128 | dest | |
129 | movq %rdx,24(%rsi) | |
130 | ||
131 | dest | |
132 | movq %r10,32(%rsi) | |
133 | dest | |
134 | movq %rbp,40(%rsi) | |
135 | dest | |
136 | movq %r14,48(%rsi) | |
137 | dest | |
138 | movq %r13,56(%rsi) | |
139 | ||
140 | 3: | |
141 | ||
142 | leaq 64(%rdi),%rdi | |
143 | leaq 64(%rsi),%rsi | |
144 | ||
145 | jnz .Lloop | |
146 | ||
147 | adcq %r9,%rax | |
148 | ||
149 | /* do last upto 56 bytes */ | |
150 | .Lhandle_tail: | |
151 | /* ecx: count */ | |
152 | movl %ecx,%r10d | |
153 | andl $63,%ecx | |
154 | shrl $3,%ecx | |
155 | jz .Lfold | |
156 | clc | |
157 | .p2align 4 | |
158 | .Lloop_8: | |
159 | source | |
160 | movq (%rdi),%rbx | |
161 | adcq %rbx,%rax | |
162 | decl %ecx | |
163 | dest | |
164 | movq %rbx,(%rsi) | |
165 | leaq 8(%rsi),%rsi /* preserve carry */ | |
166 | leaq 8(%rdi),%rdi | |
167 | jnz .Lloop_8 | |
168 | adcq %r9,%rax /* add in carry */ | |
169 | ||
170 | .Lfold: | |
171 | /* reduce checksum to 32bits */ | |
172 | movl %eax,%ebx | |
173 | shrq $32,%rax | |
174 | addl %ebx,%eax | |
175 | adcl %r9d,%eax | |
176 | ||
177 | /* do last upto 6 bytes */ | |
178 | .Lhandle_7: | |
179 | movl %r10d,%ecx | |
180 | andl $7,%ecx | |
181 | shrl $1,%ecx | |
182 | jz .Lhandle_1 | |
183 | movl $2,%edx | |
184 | xorl %ebx,%ebx | |
185 | clc | |
186 | .p2align 4 | |
187 | .Lloop_1: | |
188 | source | |
189 | movw (%rdi),%bx | |
190 | adcl %ebx,%eax | |
191 | dest | |
192 | decl %ecx | |
193 | movw %bx,(%rsi) | |
194 | leaq 2(%rdi),%rdi | |
195 | leaq 2(%rsi),%rsi | |
196 | jnz .Lloop_1 | |
197 | adcl %r9d,%eax /* add in carry */ | |
198 | ||
199 | /* handle last odd byte */ | |
200 | .Lhandle_1: | |
201 | testl $1,%r10d | |
202 | jz .Lende | |
203 | xorl %ebx,%ebx | |
204 | source | |
205 | movb (%rdi),%bl | |
206 | dest | |
207 | movb %bl,(%rsi) | |
208 | addl %ebx,%eax | |
209 | adcl %r9d,%eax /* carry */ | |
210 | ||
211 | .Lende: | |
212 | movq 2*8(%rsp),%rbx | |
213 | movq 3*8(%rsp),%r12 | |
214 | movq 4*8(%rsp),%r14 | |
215 | movq 5*8(%rsp),%r13 | |
216 | movq 6*8(%rsp),%rbp | |
217 | addq $7*8,%rsp | |
218 | ret | |
219 | ||
220 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | |
221 | .Lbad_source: | |
222 | movq (%rsp),%rax | |
223 | testq %rax,%rax | |
224 | jz .Lende | |
225 | movl $-EFAULT,(%rax) | |
226 | jmp .Lende | |
227 | ||
228 | .Lbad_dest: | |
229 | movq 8(%rsp),%rax | |
230 | testq %rax,%rax | |
231 | jz .Lende | |
232 | movl $-EFAULT,(%rax) | |
233 | jmp .Lende |