Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* |
54ad726c IM |
2 | * Compatibility mode system call entry point for x86-64. |
3 | * | |
1da177e4 | 4 | * Copyright 2000-2002 Andi Kleen, SuSE Labs. |
54ad726c | 5 | */ |
d36f9479 | 6 | #include "calling.h" |
e2d5df93 | 7 | #include <asm/asm-offsets.h> |
1da177e4 LT |
8 | #include <asm/current.h> |
9 | #include <asm/errno.h> | |
54ad726c IM |
10 | #include <asm/ia32_unistd.h> |
11 | #include <asm/thread_info.h> | |
1da177e4 | 12 | #include <asm/segment.h> |
2601e64d | 13 | #include <asm/irqflags.h> |
1ce6f868 | 14 | #include <asm/asm.h> |
63bcff2a | 15 | #include <asm/smap.h> |
1da177e4 | 16 | #include <linux/linkage.h> |
d7e7528b | 17 | #include <linux/err.h> |
1da177e4 | 18 | |
ea714547 JO |
19 | .section .entry.text, "ax" |
20 | ||
2be29982 JF |
21 | #ifdef CONFIG_PARAVIRT |
22 | ENTRY(native_usergs_sysret32) | |
23 | swapgs | |
24 | sysretl | |
25 | ENDPROC(native_usergs_sysret32) | |
2be29982 JF |
26 | #endif |
27 | ||
1da177e4 | 28 | /* |
54ad726c | 29 | * 32-bit SYSENTER instruction entry. |
1da177e4 | 30 | * |
b87cf63e DV |
31 | * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs. |
32 | * IF and VM in rflags are cleared (IOW: interrupts are off). | |
33 | * SYSENTER does not save anything on the stack, | |
34 | * and does not save old rip (!!!) and rflags. | |
35 | * | |
1da177e4 | 36 | * Arguments: |
b87cf63e DV |
37 | * eax system call number |
38 | * ebx arg1 | |
39 | * ecx arg2 | |
40 | * edx arg3 | |
41 | * esi arg4 | |
42 | * edi arg5 | |
43 | * ebp user stack | |
44 | * 0(%ebp) arg6 | |
45 | * | |
1da177e4 | 46 | * This is purely a fast path. For anything complicated we use the int 0x80 |
b87cf63e | 47 | * path below. We set up a complete hardware stack frame to share code |
1da177e4 | 48 | * with the int 0x80 path. |
b87cf63e | 49 | */ |
4c8cd0c5 | 50 | ENTRY(entry_SYSENTER_compat) |
b611acf4 | 51 | /* Interrupts are off on entry. */ |
a232e3d5 | 52 | SWAPGS_UNSAFE_STACK |
3a23208e | 53 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
a232e3d5 | 54 | |
4ee8ec17 DV |
55 | /* Zero-extending 32-bit regs, do not remove */ |
56 | movl %ebp, %ebp | |
57 | movl %eax, %eax | |
58 | ||
4c9c0e91 | 59 | movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d |
4c9c0e91 DV |
60 | |
61 | /* Construct struct pt_regs on stack */ | |
131484c8 IM |
62 | pushq $__USER32_DS /* pt_regs->ss */ |
63 | pushq %rbp /* pt_regs->sp */ | |
b611acf4 AL |
64 | |
65 | /* | |
66 | * Push flags. This is nasty. First, interrupts are currently | |
67 | * off, but we need pt_regs->flags to have IF set. Second, even | |
68 | * if TF was set when SYSENTER started, it's clear by now. We fix | |
69 | * that later using TIF_SINGLESTEP. | |
70 | */ | |
71 | pushfq /* pt_regs->flags (except IF = 0) */ | |
72 | orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ | |
73 | ||
131484c8 | 74 | pushq $__USER32_CS /* pt_regs->cs */ |
54ad726c | 75 | pushq %r10 /* pt_regs->ip = thread_info->sysenter_return */ |
131484c8 IM |
76 | pushq %rax /* pt_regs->orig_ax */ |
77 | pushq %rdi /* pt_regs->di */ | |
78 | pushq %rsi /* pt_regs->si */ | |
79 | pushq %rdx /* pt_regs->dx */ | |
80 | pushq %rcx /* pt_regs->cx */ | |
81 | pushq $-ENOSYS /* pt_regs->ax */ | |
1da177e4 | 82 | cld |
54ad726c | 83 | sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */ |
4c9c0e91 | 84 | |
8c7aa698 AL |
85 | /* |
86 | * Sysenter doesn't filter flags, so we need to clear NT | |
87 | * ourselves. To save a few cycles, we can check whether | |
88 | * NT was set instead of doing an unconditional popfq. | |
b611acf4 AL |
89 | * This needs to happen before enabling interrupts so that |
90 | * we don't get preempted with NT set. | |
8c7aa698 | 91 | */ |
54ad726c IM |
92 | testl $X86_EFLAGS_NT, EFLAGS(%rsp) |
93 | jnz sysenter_fix_flags | |
8c7aa698 AL |
94 | sysenter_flags_fixed: |
95 | ||
e62a254a AL |
96 | /* Temporary: SYSENTER is disabled. */ |
97 | #ifdef CONFIG_CONTEXT_TRACKING | |
98 | call enter_from_user_mode | |
99 | #endif | |
100 | ENABLE_INTERRUPTS(CLBR_NONE) | |
101 | movl $11, %edi | |
102 | call do_exit | |
103 | ||
2ec67971 AL |
104 | /* Unreachable. */ |
105 | ud2 | |
1da177e4 | 106 | |
8c7aa698 | 107 | sysenter_fix_flags: |
b611acf4 | 108 | pushq $X86_EFLAGS_FIXED |
131484c8 | 109 | popfq |
54ad726c | 110 | jmp sysenter_flags_fixed |
4c8cd0c5 | 111 | ENDPROC(entry_SYSENTER_compat) |
1da177e4 LT |
112 | |
113 | /* | |
54ad726c | 114 | * 32-bit SYSCALL instruction entry. |
1da177e4 | 115 | * |
54ad726c | 116 | * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, |
b87cf63e DV |
117 | * then loads new ss, cs, and rip from previously programmed MSRs. |
118 | * rflags gets masked by a value from another MSR (so CLD and CLAC | |
119 | * are not needed). SYSCALL does not save anything on the stack | |
120 | * and does not change rsp. | |
121 | * | |
122 | * Note: rflags saving+masking-with-MSR happens only in Long mode | |
54ad726c | 123 | * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it). |
b87cf63e DV |
124 | * Don't get confused: rflags saving+masking depends on Long Mode Active bit |
125 | * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes | |
126 | * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). | |
127 | * | |
1da177e4 | 128 | * Arguments: |
b87cf63e DV |
129 | * eax system call number |
130 | * ecx return address | |
131 | * ebx arg1 | |
132 | * ebp arg2 (note: not saved in the stack frame, should not be touched) | |
133 | * edx arg3 | |
134 | * esi arg4 | |
135 | * edi arg5 | |
136 | * esp user stack | |
137 | * 0(%esp) arg6 | |
138 | * | |
1da177e4 | 139 | * This is purely a fast path. For anything complicated we use the int 0x80 |
b87cf63e DV |
140 | * path below. We set up a complete hardware stack frame to share code |
141 | * with the int 0x80 path. | |
142 | */ | |
2cd23553 | 143 | ENTRY(entry_SYSCALL_compat) |
a232e3d5 DV |
144 | /* |
145 | * Interrupts are off on entry. | |
146 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, | |
147 | * it is too small to ever cause noticeable irq latency. | |
148 | */ | |
457da70e | 149 | SWAPGS_UNSAFE_STACK |
e62a254a AL |
150 | |
151 | /* Temporary: SYSCALL32 is disabled. */ | |
152 | movl $-ENOSYS, %eax | |
153 | USERGS_SYSRET32 | |
154 | ||
54ad726c IM |
155 | movl %esp, %r8d |
156 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp | |
66804154 | 157 | ENABLE_INTERRUPTS(CLBR_NONE) |
a232e3d5 | 158 | |
4ee8ec17 | 159 | /* Zero-extending 32-bit regs, do not remove */ |
54ad726c | 160 | movl %eax, %eax |
4ee8ec17 | 161 | |
4c9c0e91 | 162 | /* Construct struct pt_regs on stack */ |
131484c8 IM |
163 | pushq $__USER32_DS /* pt_regs->ss */ |
164 | pushq %r8 /* pt_regs->sp */ | |
165 | pushq %r11 /* pt_regs->flags */ | |
166 | pushq $__USER32_CS /* pt_regs->cs */ | |
167 | pushq %rcx /* pt_regs->ip */ | |
168 | pushq %rax /* pt_regs->orig_ax */ | |
169 | pushq %rdi /* pt_regs->di */ | |
170 | pushq %rsi /* pt_regs->si */ | |
171 | pushq %rdx /* pt_regs->dx */ | |
172 | pushq %rbp /* pt_regs->cx */ | |
54ad726c | 173 | movl %ebp, %ecx |
131484c8 | 174 | pushq $-ENOSYS /* pt_regs->ax */ |
54ad726c | 175 | sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */ |
4c9c0e91 | 176 | |
2ec67971 AL |
177 | /* Unreachable. */ |
178 | ud2 | |
2cd23553 | 179 | END(entry_SYSCALL_compat) |
54ad726c | 180 | |
b87cf63e DV |
181 | /* |
182 | * Emulated IA32 system calls via int 0x80. | |
1da177e4 | 183 | * |
b87cf63e DV |
184 | * Arguments: |
185 | * eax system call number | |
186 | * ebx arg1 | |
187 | * ecx arg2 | |
188 | * edx arg3 | |
189 | * esi arg4 | |
190 | * edi arg5 | |
191 | * ebp arg6 (note: not saved in the stack frame, should not be touched) | |
1da177e4 LT |
192 | * |
193 | * Notes: | |
b87cf63e DV |
194 | * Uses the same stack frame as the x86-64 version. |
195 | * All registers except eax must be saved (but ptrace may violate that). | |
1da177e4 LT |
196 | * Arguments are zero extended. For system calls that want sign extension and |
197 | * take long arguments a wrapper is needed. Most calls can just be called | |
198 | * directly. | |
b87cf63e DV |
199 | * Assumes it is only called from user space and entered with interrupts off. |
200 | */ | |
1da177e4 | 201 | |
2cd23553 | 202 | ENTRY(entry_INT80_compat) |
2601e64d | 203 | /* |
a232e3d5 | 204 | * Interrupts are off on entry. |
2601e64d | 205 | */ |
a232e3d5 DV |
206 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
207 | SWAPGS | |
a232e3d5 | 208 | |
ee08c6bd AL |
209 | /* |
210 | * User tracing code (ptrace or signal handlers) might assume that | |
211 | * the saved RAX contains a 32-bit number when we're invoking a 32-bit | |
212 | * syscall. Just in case the high bits are nonzero, zero-extend | |
213 | * the syscall number. (This could almost certainly be deleted | |
214 | * with no ill effects.) | |
215 | */ | |
54ad726c | 216 | movl %eax, %eax |
4ee8ec17 | 217 | |
4c9c0e91 | 218 | /* Construct struct pt_regs on stack (iret frame is already on stack) */ |
131484c8 IM |
219 | pushq %rax /* pt_regs->orig_ax */ |
220 | pushq %rdi /* pt_regs->di */ | |
221 | pushq %rsi /* pt_regs->si */ | |
222 | pushq %rdx /* pt_regs->dx */ | |
223 | pushq %rcx /* pt_regs->cx */ | |
224 | pushq $-ENOSYS /* pt_regs->ax */ | |
8169aff6 AL |
225 | xorq %r8,%r8 |
226 | pushq %r8 /* pt_regs->r8 = 0 */ | |
227 | pushq %r8 /* pt_regs->r9 = 0 */ | |
228 | pushq %r8 /* pt_regs->r10 = 0 */ | |
229 | pushq %r8 /* pt_regs->r11 = 0 */ | |
230 | pushq %rbx /* pt_regs->rbx */ | |
231 | pushq %rbp /* pt_regs->rbp */ | |
232 | pushq %r12 /* pt_regs->r12 */ | |
233 | pushq %r13 /* pt_regs->r13 */ | |
234 | pushq %r14 /* pt_regs->r14 */ | |
235 | pushq %r15 /* pt_regs->r15 */ | |
1da177e4 | 236 | cld |
54ad726c | 237 | |
73cbf687 | 238 | /* |
ee08c6bd AL |
239 | * User mode is traced as though IRQs are on, and the interrupt |
240 | * gate turned them off. | |
73cbf687 | 241 | */ |
ee08c6bd AL |
242 | TRACE_IRQS_OFF |
243 | ||
244 | movq %rsp, %rdi | |
245 | call do_int80_syscall_32 | |
246 | ||
247 | /* Go back to user mode. */ | |
248 | TRACE_IRQS_ON | |
249 | SWAPGS | |
250 | jmp restore_regs_and_iret | |
2cd23553 | 251 | END(entry_INT80_compat) |
1da177e4 | 252 | |
1d4b4b29 AV |
253 | ALIGN |
254 | GLOBAL(stub32_clone) | |
5cdc683b | 255 | /* |
7a5a9824 DV |
256 | * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr). |
257 | * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val). | |
258 | * | |
259 | * The native 64-bit kernel's sys_clone() implements the latter, | |
260 | * so we need to swap arguments here before calling it: | |
5cdc683b | 261 | */ |
7a5a9824 | 262 | xchg %r8, %rcx |
8169aff6 | 263 | jmp sys_clone |