Commit | Line | Data |
---|---|---|
0812a579 AK |
1 | /* Copyright 2002 Andi Kleen, SuSE Labs. |
2 | * Subject to the GNU Public License v2. | |
3 | * | |
4 | * Functions to copy from and to user space. | |
5 | */ | |
6 | ||
7 | #include <linux/linkage.h> | |
8 | #include <asm/dwarf2.h> | |
9 | ||
10 | #define FIX_ALIGNMENT 1 | |
11 | ||
12 | #include <asm/current.h> | |
13 | #include <asm/asm-offsets.h> | |
14 | #include <asm/thread_info.h> | |
15 | #include <asm/cpufeature.h> | |
16 | ||
17 | /* | |
18 | * copy_user_nocache - Uncached memory copy with exception handling | |
19 | * This will force destination/source out of cache for more performance. | |
20 | * | |
21 | * Input: | |
22 | * rdi destination | |
23 | * rsi source | |
24 | * rdx count | |
25 | * rcx zero flag when 1 zero on exception | |
26 | * | |
27 | * Output: | |
28 | * eax uncopied bytes or 0 if successful. | |
29 | */ | |
30 | ENTRY(__copy_user_nocache) | |
31 | CFI_STARTPROC | |
32 | pushq %rbx | |
33 | CFI_ADJUST_CFA_OFFSET 8 | |
34 | CFI_REL_OFFSET rbx, 0 | |
35 | pushq %rcx /* save zero flag */ | |
36 | CFI_ADJUST_CFA_OFFSET 8 | |
37 | CFI_REL_OFFSET rcx, 0 | |
38 | ||
39 | xorl %eax,%eax /* zero for the exception handler */ | |
40 | ||
41 | #ifdef FIX_ALIGNMENT | |
42 | /* check for bad alignment of destination */ | |
43 | movl %edi,%ecx | |
44 | andl $7,%ecx | |
45 | jnz .Lbad_alignment | |
46 | .Lafter_bad_alignment: | |
47 | #endif | |
48 | ||
49 | movq %rdx,%rcx | |
50 | ||
51 | movl $64,%ebx | |
52 | shrq $6,%rdx | |
53 | decq %rdx | |
54 | js .Lhandle_tail | |
55 | ||
56 | .p2align 4 | |
57 | .Lloop: | |
58 | .Ls1: movq (%rsi),%r11 | |
59 | .Ls2: movq 1*8(%rsi),%r8 | |
60 | .Ls3: movq 2*8(%rsi),%r9 | |
61 | .Ls4: movq 3*8(%rsi),%r10 | |
62 | .Ld1: movnti %r11,(%rdi) | |
63 | .Ld2: movnti %r8,1*8(%rdi) | |
64 | .Ld3: movnti %r9,2*8(%rdi) | |
65 | .Ld4: movnti %r10,3*8(%rdi) | |
66 | ||
67 | .Ls5: movq 4*8(%rsi),%r11 | |
68 | .Ls6: movq 5*8(%rsi),%r8 | |
69 | .Ls7: movq 6*8(%rsi),%r9 | |
70 | .Ls8: movq 7*8(%rsi),%r10 | |
71 | .Ld5: movnti %r11,4*8(%rdi) | |
72 | .Ld6: movnti %r8,5*8(%rdi) | |
73 | .Ld7: movnti %r9,6*8(%rdi) | |
74 | .Ld8: movnti %r10,7*8(%rdi) | |
75 | ||
76 | dec %rdx | |
77 | ||
78 | leaq 64(%rsi),%rsi | |
79 | leaq 64(%rdi),%rdi | |
80 | ||
81 | jns .Lloop | |
82 | ||
83 | .p2align 4 | |
84 | .Lhandle_tail: | |
85 | movl %ecx,%edx | |
86 | andl $63,%ecx | |
87 | shrl $3,%ecx | |
88 | jz .Lhandle_7 | |
89 | movl $8,%ebx | |
90 | .p2align 4 | |
91 | .Lloop_8: | |
92 | .Ls9: movq (%rsi),%r8 | |
93 | .Ld9: movnti %r8,(%rdi) | |
94 | decl %ecx | |
95 | leaq 8(%rdi),%rdi | |
96 | leaq 8(%rsi),%rsi | |
97 | jnz .Lloop_8 | |
98 | ||
99 | .Lhandle_7: | |
100 | movl %edx,%ecx | |
101 | andl $7,%ecx | |
102 | jz .Lende | |
103 | .p2align 4 | |
104 | .Lloop_1: | |
105 | .Ls10: movb (%rsi),%bl | |
106 | .Ld10: movb %bl,(%rdi) | |
107 | incq %rdi | |
108 | incq %rsi | |
109 | decl %ecx | |
110 | jnz .Lloop_1 | |
111 | ||
112 | CFI_REMEMBER_STATE | |
113 | .Lende: | |
114 | popq %rcx | |
115 | CFI_ADJUST_CFA_OFFSET -8 | |
116 | CFI_RESTORE %rcx | |
117 | popq %rbx | |
118 | CFI_ADJUST_CFA_OFFSET -8 | |
119 | CFI_RESTORE rbx | |
df1bdc06 | 120 | sfence |
0812a579 AK |
121 | ret |
122 | CFI_RESTORE_STATE | |
123 | ||
124 | #ifdef FIX_ALIGNMENT | |
125 | /* align destination */ | |
126 | .p2align 4 | |
127 | .Lbad_alignment: | |
128 | movl $8,%r9d | |
129 | subl %ecx,%r9d | |
130 | movl %r9d,%ecx | |
131 | cmpq %r9,%rdx | |
132 | jz .Lhandle_7 | |
133 | js .Lhandle_7 | |
134 | .Lalign_1: | |
135 | .Ls11: movb (%rsi),%bl | |
136 | .Ld11: movb %bl,(%rdi) | |
137 | incq %rsi | |
138 | incq %rdi | |
139 | decl %ecx | |
140 | jnz .Lalign_1 | |
141 | subq %r9,%rdx | |
142 | jmp .Lafter_bad_alignment | |
143 | #endif | |
144 | ||
145 | /* table sorted by exception address */ | |
146 | .section __ex_table,"a" | |
147 | .align 8 | |
148 | .quad .Ls1,.Ls1e | |
149 | .quad .Ls2,.Ls2e | |
150 | .quad .Ls3,.Ls3e | |
151 | .quad .Ls4,.Ls4e | |
152 | .quad .Ld1,.Ls1e | |
153 | .quad .Ld2,.Ls2e | |
154 | .quad .Ld3,.Ls3e | |
155 | .quad .Ld4,.Ls4e | |
156 | .quad .Ls5,.Ls5e | |
157 | .quad .Ls6,.Ls6e | |
158 | .quad .Ls7,.Ls7e | |
159 | .quad .Ls8,.Ls8e | |
160 | .quad .Ld5,.Ls5e | |
161 | .quad .Ld6,.Ls6e | |
162 | .quad .Ld7,.Ls7e | |
163 | .quad .Ld8,.Ls8e | |
164 | .quad .Ls9,.Le_quad | |
165 | .quad .Ld9,.Le_quad | |
166 | .quad .Ls10,.Le_byte | |
167 | .quad .Ld10,.Le_byte | |
168 | #ifdef FIX_ALIGNMENT | |
169 | .quad .Ls11,.Lzero_rest | |
170 | .quad .Ld11,.Lzero_rest | |
171 | #endif | |
172 | .quad .Le5,.Le_zero | |
173 | .previous | |
174 | ||
175 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | |
176 | pessimistic side. this is gross. it would be better to fix the | |
177 | interface. */ | |
178 | /* eax: zero, ebx: 64 */ | |
179 | .Ls1e: addl $8,%eax | |
180 | .Ls2e: addl $8,%eax | |
181 | .Ls3e: addl $8,%eax | |
182 | .Ls4e: addl $8,%eax | |
183 | .Ls5e: addl $8,%eax | |
184 | .Ls6e: addl $8,%eax | |
185 | .Ls7e: addl $8,%eax | |
186 | .Ls8e: addl $8,%eax | |
187 | addq %rbx,%rdi /* +64 */ | |
188 | subq %rax,%rdi /* correct destination with computed offset */ | |
189 | ||
190 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | |
191 | addq %rax,%rdx /* add offset to loopcnt */ | |
192 | andl $63,%ecx /* remaining bytes */ | |
193 | addq %rcx,%rdx /* add them */ | |
194 | jmp .Lzero_rest | |
195 | ||
196 | /* exception on quad word loop in tail handling */ | |
197 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | |
198 | .Le_quad: | |
199 | shll $3,%ecx | |
200 | andl $7,%edx | |
201 | addl %ecx,%edx | |
202 | /* edx: bytes to zero, rdi: dest, eax:zero */ | |
203 | .Lzero_rest: | |
204 | cmpl $0,(%rsp) /* zero flag set? */ | |
205 | jz .Le_zero | |
206 | movq %rdx,%rcx | |
207 | .Le_byte: | |
208 | xorl %eax,%eax | |
209 | .Le5: rep | |
210 | stosb | |
211 | /* when there is another exception while zeroing the rest just return */ | |
212 | .Le_zero: | |
213 | movq %rdx,%rax | |
214 | jmp .Lende | |
215 | CFI_ENDPROC | |
216 | ENDPROC(__copy_user_nocache) | |
217 | ||
218 |