x86/entry: Add C code for fast system call entries
[deliverable/linux.git] / arch / x86 / entry / entry_64_compat.S
CommitLineData
1da177e4 1/*
54ad726c
IM
2 * Compatibility mode system call entry point for x86-64.
3 *
1da177e4 4 * Copyright 2000-2002 Andi Kleen, SuSE Labs.
54ad726c 5 */
d36f9479 6#include "calling.h"
e2d5df93 7#include <asm/asm-offsets.h>
1da177e4
LT
8#include <asm/current.h>
9#include <asm/errno.h>
54ad726c
IM
10#include <asm/ia32_unistd.h>
11#include <asm/thread_info.h>
1da177e4 12#include <asm/segment.h>
2601e64d 13#include <asm/irqflags.h>
1ce6f868 14#include <asm/asm.h>
63bcff2a 15#include <asm/smap.h>
1da177e4 16#include <linux/linkage.h>
d7e7528b 17#include <linux/err.h>
1da177e4 18
ea714547
JO
19 .section .entry.text, "ax"
20
2be29982
JF
21#ifdef CONFIG_PARAVIRT
22ENTRY(native_usergs_sysret32)
23 swapgs
24 sysretl
25ENDPROC(native_usergs_sysret32)
2be29982
JF
26#endif
27
1da177e4 28/*
54ad726c 29 * 32-bit SYSENTER instruction entry.
1da177e4 30 *
b87cf63e
DV
31 * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
32 * IF and VM in rflags are cleared (IOW: interrupts are off).
33 * SYSENTER does not save anything on the stack,
34 * and does not save old rip (!!!) and rflags.
35 *
1da177e4 36 * Arguments:
b87cf63e
DV
37 * eax system call number
38 * ebx arg1
39 * ecx arg2
40 * edx arg3
41 * esi arg4
42 * edi arg5
43 * ebp user stack
44 * 0(%ebp) arg6
45 *
1da177e4 46 * This is purely a fast path. For anything complicated we use the int 0x80
b87cf63e 47 * path below. We set up a complete hardware stack frame to share code
1da177e4 48 * with the int 0x80 path.
b87cf63e 49 */
4c8cd0c5 50ENTRY(entry_SYSENTER_compat)
b611acf4 51 /* Interrupts are off on entry. */
a232e3d5 52 SWAPGS_UNSAFE_STACK
3a23208e 53 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
a232e3d5 54
4ee8ec17
DV
55 /* Zero-extending 32-bit regs, do not remove */
56 movl %ebp, %ebp
57 movl %eax, %eax
58
4c9c0e91 59 movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
4c9c0e91
DV
60
61 /* Construct struct pt_regs on stack */
131484c8
IM
62 pushq $__USER32_DS /* pt_regs->ss */
63 pushq %rbp /* pt_regs->sp */
b611acf4
AL
64
65 /*
66 * Push flags. This is nasty. First, interrupts are currently
67 * off, but we need pt_regs->flags to have IF set. Second, even
68 * if TF was set when SYSENTER started, it's clear by now. We fix
69 * that later using TIF_SINGLESTEP.
70 */
71 pushfq /* pt_regs->flags (except IF = 0) */
72 orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
73
131484c8 74 pushq $__USER32_CS /* pt_regs->cs */
54ad726c 75 pushq %r10 /* pt_regs->ip = thread_info->sysenter_return */
131484c8
IM
76 pushq %rax /* pt_regs->orig_ax */
77 pushq %rdi /* pt_regs->di */
78 pushq %rsi /* pt_regs->si */
79 pushq %rdx /* pt_regs->dx */
80 pushq %rcx /* pt_regs->cx */
81 pushq $-ENOSYS /* pt_regs->ax */
1da177e4 82 cld
54ad726c 83 sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
4c9c0e91 84
8c7aa698
AL
85 /*
86 * Sysenter doesn't filter flags, so we need to clear NT
87 * ourselves. To save a few cycles, we can check whether
88 * NT was set instead of doing an unconditional popfq.
b611acf4
AL
89 * This needs to happen before enabling interrupts so that
90 * we don't get preempted with NT set.
8c7aa698 91 */
54ad726c
IM
92 testl $X86_EFLAGS_NT, EFLAGS(%rsp)
93 jnz sysenter_fix_flags
8c7aa698
AL
94sysenter_flags_fixed:
95
e62a254a
AL
96 /* Temporary: SYSENTER is disabled. */
97#ifdef CONFIG_CONTEXT_TRACKING
98 call enter_from_user_mode
99#endif
100 ENABLE_INTERRUPTS(CLBR_NONE)
101 movl $11, %edi
102 call do_exit
103
2ec67971
AL
104 /* Unreachable. */
105 ud2
1da177e4 106
8c7aa698 107sysenter_fix_flags:
b611acf4 108 pushq $X86_EFLAGS_FIXED
131484c8 109 popfq
54ad726c 110 jmp sysenter_flags_fixed
4c8cd0c5 111ENDPROC(entry_SYSENTER_compat)
1da177e4
LT
112
113/*
54ad726c 114 * 32-bit SYSCALL instruction entry.
1da177e4 115 *
54ad726c 116 * 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
b87cf63e
DV
117 * then loads new ss, cs, and rip from previously programmed MSRs.
118 * rflags gets masked by a value from another MSR (so CLD and CLAC
119 * are not needed). SYSCALL does not save anything on the stack
120 * and does not change rsp.
121 *
122 * Note: rflags saving+masking-with-MSR happens only in Long mode
54ad726c 123 * (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
b87cf63e
DV
124 * Don't get confused: rflags saving+masking depends on Long Mode Active bit
125 * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
126 * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
127 *
1da177e4 128 * Arguments:
b87cf63e
DV
129 * eax system call number
130 * ecx return address
131 * ebx arg1
132 * ebp arg2 (note: not saved in the stack frame, should not be touched)
133 * edx arg3
134 * esi arg4
135 * edi arg5
136 * esp user stack
137 * 0(%esp) arg6
138 *
1da177e4 139 * This is purely a fast path. For anything complicated we use the int 0x80
b87cf63e
DV
140 * path below. We set up a complete hardware stack frame to share code
141 * with the int 0x80 path.
142 */
2cd23553 143ENTRY(entry_SYSCALL_compat)
a232e3d5
DV
144 /*
145 * Interrupts are off on entry.
146 * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
147 * it is too small to ever cause noticeable irq latency.
148 */
457da70e 149 SWAPGS_UNSAFE_STACK
e62a254a
AL
150
151 /* Temporary: SYSCALL32 is disabled. */
152 movl $-ENOSYS, %eax
153 USERGS_SYSRET32
154
54ad726c
IM
155 movl %esp, %r8d
156 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
66804154 157 ENABLE_INTERRUPTS(CLBR_NONE)
a232e3d5 158
4ee8ec17 159 /* Zero-extending 32-bit regs, do not remove */
54ad726c 160 movl %eax, %eax
4ee8ec17 161
4c9c0e91 162 /* Construct struct pt_regs on stack */
131484c8
IM
163 pushq $__USER32_DS /* pt_regs->ss */
164 pushq %r8 /* pt_regs->sp */
165 pushq %r11 /* pt_regs->flags */
166 pushq $__USER32_CS /* pt_regs->cs */
167 pushq %rcx /* pt_regs->ip */
168 pushq %rax /* pt_regs->orig_ax */
169 pushq %rdi /* pt_regs->di */
170 pushq %rsi /* pt_regs->si */
171 pushq %rdx /* pt_regs->dx */
172 pushq %rbp /* pt_regs->cx */
54ad726c 173 movl %ebp, %ecx
131484c8 174 pushq $-ENOSYS /* pt_regs->ax */
54ad726c 175 sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
4c9c0e91 176
2ec67971
AL
177 /* Unreachable. */
178 ud2
2cd23553 179END(entry_SYSCALL_compat)
54ad726c 180
b87cf63e
DV
181/*
182 * Emulated IA32 system calls via int 0x80.
1da177e4 183 *
b87cf63e
DV
184 * Arguments:
185 * eax system call number
186 * ebx arg1
187 * ecx arg2
188 * edx arg3
189 * esi arg4
190 * edi arg5
191 * ebp arg6 (note: not saved in the stack frame, should not be touched)
1da177e4
LT
192 *
193 * Notes:
b87cf63e
DV
194 * Uses the same stack frame as the x86-64 version.
195 * All registers except eax must be saved (but ptrace may violate that).
1da177e4
LT
196 * Arguments are zero extended. For system calls that want sign extension and
197 * take long arguments a wrapper is needed. Most calls can just be called
198 * directly.
b87cf63e
DV
199 * Assumes it is only called from user space and entered with interrupts off.
200 */
1da177e4 201
2cd23553 202ENTRY(entry_INT80_compat)
2601e64d 203 /*
a232e3d5 204 * Interrupts are off on entry.
2601e64d 205 */
a232e3d5
DV
206 PARAVIRT_ADJUST_EXCEPTION_FRAME
207 SWAPGS
a232e3d5 208
ee08c6bd
AL
209 /*
210 * User tracing code (ptrace or signal handlers) might assume that
211 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
212 * syscall. Just in case the high bits are nonzero, zero-extend
213 * the syscall number. (This could almost certainly be deleted
214 * with no ill effects.)
215 */
54ad726c 216 movl %eax, %eax
4ee8ec17 217
4c9c0e91 218 /* Construct struct pt_regs on stack (iret frame is already on stack) */
131484c8
IM
219 pushq %rax /* pt_regs->orig_ax */
220 pushq %rdi /* pt_regs->di */
221 pushq %rsi /* pt_regs->si */
222 pushq %rdx /* pt_regs->dx */
223 pushq %rcx /* pt_regs->cx */
224 pushq $-ENOSYS /* pt_regs->ax */
8169aff6
AL
225 xorq %r8,%r8
226 pushq %r8 /* pt_regs->r8 = 0 */
227 pushq %r8 /* pt_regs->r9 = 0 */
228 pushq %r8 /* pt_regs->r10 = 0 */
229 pushq %r8 /* pt_regs->r11 = 0 */
230 pushq %rbx /* pt_regs->rbx */
231 pushq %rbp /* pt_regs->rbp */
232 pushq %r12 /* pt_regs->r12 */
233 pushq %r13 /* pt_regs->r13 */
234 pushq %r14 /* pt_regs->r14 */
235 pushq %r15 /* pt_regs->r15 */
1da177e4 236 cld
54ad726c 237
73cbf687 238 /*
ee08c6bd
AL
239 * User mode is traced as though IRQs are on, and the interrupt
240 * gate turned them off.
73cbf687 241 */
ee08c6bd
AL
242 TRACE_IRQS_OFF
243
244 movq %rsp, %rdi
245 call do_int80_syscall_32
246
247 /* Go back to user mode. */
248 TRACE_IRQS_ON
249 SWAPGS
250 jmp restore_regs_and_iret
2cd23553 251END(entry_INT80_compat)
1da177e4 252
1d4b4b29
AV
253 ALIGN
254GLOBAL(stub32_clone)
5cdc683b 255 /*
7a5a9824
DV
256 * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
257 * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
258 *
259 * The native 64-bit kernel's sys_clone() implements the latter,
260 * so we need to swap arguments here before calling it:
5cdc683b 261 */
7a5a9824 262 xchg %r8, %rcx
8169aff6 263 jmp sys_clone
This page took 7.04789 seconds and 5 git commands to generate.