Merge branch 'x86/cleanups' into x86/irq
[deliverable/linux.git] / arch / x86 / kernel / entry_64.S
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
1da177e4
LT
7 */
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
0bd7b798
AH
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
1da177e4 16 * only done for syscall tracing, signals or fork/exec et.al.
0bd7b798
AH
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
1da177e4 21 * - partial stack frame: partially saved registers upto R11.
0bd7b798 22 * - full stack frame: Like partial stack frame, but all register saved.
2e91a17b
AK
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
1da177e4
LT
38 */
39
1da177e4
LT
40#include <linux/linkage.h>
41#include <asm/segment.h>
1da177e4
LT
42#include <asm/cache.h>
43#include <asm/errno.h>
44#include <asm/dwarf2.h>
45#include <asm/calling.h>
e2d5df93 46#include <asm/asm-offsets.h>
1da177e4
LT
47#include <asm/msr.h>
48#include <asm/unistd.h>
49#include <asm/thread_info.h>
50#include <asm/hw_irq.h>
5f8efbb9 51#include <asm/page.h>
2601e64d 52#include <asm/irqflags.h>
72fe4858 53#include <asm/paravirt.h>
395a59d0 54#include <asm/ftrace.h>
1da177e4 55
86a1c34a
RM
56/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57#include <linux/elf-em.h>
58#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
59#define __AUDIT_ARCH_64BIT 0x80000000
60#define __AUDIT_ARCH_LE 0x40000000
61
1da177e4
LT
62 .code64
63
606576ce 64#ifdef CONFIG_FUNCTION_TRACER
d61f82d0
SR
65#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount)
d61f82d0
SR
67 retq
68END(mcount)
69
70ENTRY(ftrace_caller)
71
72 /* taken from glibc */
73 subq $0x38, %rsp
74 movq %rax, (%rsp)
75 movq %rcx, 8(%rsp)
76 movq %rdx, 16(%rsp)
77 movq %rsi, 24(%rsp)
78 movq %rdi, 32(%rsp)
79 movq %r8, 40(%rsp)
80 movq %r9, 48(%rsp)
81
82 movq 0x38(%rsp), %rdi
83 movq 8(%rbp), %rsi
395a59d0 84 subq $MCOUNT_INSN_SIZE, %rdi
d61f82d0
SR
85
86.globl ftrace_call
87ftrace_call:
88 call ftrace_stub
89
90 movq 48(%rsp), %r9
91 movq 40(%rsp), %r8
92 movq 32(%rsp), %rdi
93 movq 24(%rsp), %rsi
94 movq 16(%rsp), %rdx
95 movq 8(%rsp), %rcx
96 movq (%rsp), %rax
97 addq $0x38, %rsp
98
99.globl ftrace_stub
100ftrace_stub:
101 retq
102END(ftrace_caller)
103
104#else /* ! CONFIG_DYNAMIC_FTRACE */
16444a8a
ACM
105ENTRY(mcount)
106 cmpq $ftrace_stub, ftrace_trace_function
107 jnz trace
108.globl ftrace_stub
109ftrace_stub:
110 retq
111
112trace:
113 /* taken from glibc */
114 subq $0x38, %rsp
115 movq %rax, (%rsp)
116 movq %rcx, 8(%rsp)
117 movq %rdx, 16(%rsp)
118 movq %rsi, 24(%rsp)
119 movq %rdi, 32(%rsp)
120 movq %r8, 40(%rsp)
121 movq %r9, 48(%rsp)
122
123 movq 0x38(%rsp), %rdi
124 movq 8(%rbp), %rsi
395a59d0 125 subq $MCOUNT_INSN_SIZE, %rdi
16444a8a
ACM
126
127 call *ftrace_trace_function
128
129 movq 48(%rsp), %r9
130 movq 40(%rsp), %r8
131 movq 32(%rsp), %rdi
132 movq 24(%rsp), %rsi
133 movq 16(%rsp), %rdx
134 movq 8(%rsp), %rcx
135 movq (%rsp), %rax
136 addq $0x38, %rsp
137
138 jmp ftrace_stub
139END(mcount)
d61f82d0 140#endif /* CONFIG_DYNAMIC_FTRACE */
606576ce 141#endif /* CONFIG_FUNCTION_TRACER */
16444a8a 142
dc37db4d 143#ifndef CONFIG_PREEMPT
1da177e4 144#define retint_kernel retint_restore_args
0bd7b798 145#endif
2601e64d 146
72fe4858 147#ifdef CONFIG_PARAVIRT
2be29982 148ENTRY(native_usergs_sysret64)
72fe4858
GOC
149 swapgs
150 sysretq
151#endif /* CONFIG_PARAVIRT */
152
2601e64d
IM
153
154.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
155#ifdef CONFIG_TRACE_IRQFLAGS
156 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
157 jnc 1f
158 TRACE_IRQS_ON
1591:
160#endif
161.endm
162
1da177e4 163/*
0bd7b798
AH
164 * C code is not supposed to know about undefined top of stack. Every time
165 * a C function with an pt_regs argument is called from the SYSCALL based
1da177e4
LT
166 * fast path FIXUP_TOP_OF_STACK is needed.
167 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
168 * manipulation.
0bd7b798
AH
169 */
170
171 /* %rsp:at FRAMEEND */
1da177e4
LT
172 .macro FIXUP_TOP_OF_STACK tmp
173 movq %gs:pda_oldrsp,\tmp
174 movq \tmp,RSP(%rsp)
175 movq $__USER_DS,SS(%rsp)
176 movq $__USER_CS,CS(%rsp)
177 movq $-1,RCX(%rsp)
178 movq R11(%rsp),\tmp /* get eflags */
179 movq \tmp,EFLAGS(%rsp)
180 .endm
181
182 .macro RESTORE_TOP_OF_STACK tmp,offset=0
183 movq RSP-\offset(%rsp),\tmp
184 movq \tmp,%gs:pda_oldrsp
185 movq EFLAGS-\offset(%rsp),\tmp
186 movq \tmp,R11-\offset(%rsp)
187 .endm
188
189 .macro FAKE_STACK_FRAME child_rip
190 /* push in order ss, rsp, eflags, cs, rip */
3829ee6b 191 xorl %eax, %eax
e04e0a63 192 pushq $__KERNEL_DS /* ss */
1da177e4 193 CFI_ADJUST_CFA_OFFSET 8
7effaa88 194 /*CFI_REL_OFFSET ss,0*/
1da177e4
LT
195 pushq %rax /* rsp */
196 CFI_ADJUST_CFA_OFFSET 8
7effaa88 197 CFI_REL_OFFSET rsp,0
1da177e4
LT
198 pushq $(1<<9) /* eflags - interrupts on */
199 CFI_ADJUST_CFA_OFFSET 8
7effaa88 200 /*CFI_REL_OFFSET rflags,0*/
1da177e4
LT
201 pushq $__KERNEL_CS /* cs */
202 CFI_ADJUST_CFA_OFFSET 8
7effaa88 203 /*CFI_REL_OFFSET cs,0*/
1da177e4
LT
204 pushq \child_rip /* rip */
205 CFI_ADJUST_CFA_OFFSET 8
7effaa88 206 CFI_REL_OFFSET rip,0
1da177e4
LT
207 pushq %rax /* orig rax */
208 CFI_ADJUST_CFA_OFFSET 8
209 .endm
210
211 .macro UNFAKE_STACK_FRAME
212 addq $8*6, %rsp
213 CFI_ADJUST_CFA_OFFSET -(6*8)
214 .endm
215
7effaa88
JB
216 .macro CFI_DEFAULT_STACK start=1
217 .if \start
218 CFI_STARTPROC simple
adf14236 219 CFI_SIGNAL_FRAME
7effaa88
JB
220 CFI_DEF_CFA rsp,SS+8
221 .else
222 CFI_DEF_CFA_OFFSET SS+8
223 .endif
224 CFI_REL_OFFSET r15,R15
225 CFI_REL_OFFSET r14,R14
226 CFI_REL_OFFSET r13,R13
227 CFI_REL_OFFSET r12,R12
228 CFI_REL_OFFSET rbp,RBP
229 CFI_REL_OFFSET rbx,RBX
230 CFI_REL_OFFSET r11,R11
231 CFI_REL_OFFSET r10,R10
232 CFI_REL_OFFSET r9,R9
233 CFI_REL_OFFSET r8,R8
234 CFI_REL_OFFSET rax,RAX
235 CFI_REL_OFFSET rcx,RCX
236 CFI_REL_OFFSET rdx,RDX
237 CFI_REL_OFFSET rsi,RSI
238 CFI_REL_OFFSET rdi,RDI
239 CFI_REL_OFFSET rip,RIP
240 /*CFI_REL_OFFSET cs,CS*/
241 /*CFI_REL_OFFSET rflags,EFLAGS*/
242 CFI_REL_OFFSET rsp,RSP
243 /*CFI_REL_OFFSET ss,SS*/
1da177e4
LT
244 .endm
245/*
246 * A newly forked process directly context switches into this.
0bd7b798
AH
247 */
248/* rdi: prev */
1da177e4 249ENTRY(ret_from_fork)
1da177e4 250 CFI_DEFAULT_STACK
658fdbef 251 push kernel_eflags(%rip)
e0a5a5d9 252 CFI_ADJUST_CFA_OFFSET 8
658fdbef 253 popf # reset kernel eflags
e0a5a5d9 254 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
255 call schedule_tail
256 GET_THREAD_INFO(%rcx)
26ccb8a7 257 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
1da177e4 258 jnz rff_trace
0bd7b798 259rff_action:
1da177e4
LT
260 RESTORE_REST
261 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
262 je int_ret_from_sys_call
26ccb8a7 263 testl $_TIF_IA32,TI_flags(%rcx)
1da177e4
LT
264 jnz int_ret_from_sys_call
265 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
266 jmp ret_from_sys_call
267rff_trace:
268 movq %rsp,%rdi
269 call syscall_trace_leave
0bd7b798 270 GET_THREAD_INFO(%rcx)
1da177e4
LT
271 jmp rff_action
272 CFI_ENDPROC
4b787e0b 273END(ret_from_fork)
1da177e4
LT
274
275/*
276 * System call entry. Upto 6 arguments in registers are supported.
277 *
278 * SYSCALL does not save anything on the stack and does not change the
279 * stack pointer.
280 */
0bd7b798 281
1da177e4 282/*
0bd7b798 283 * Register setup:
1da177e4
LT
284 * rax system call number
285 * rdi arg0
0bd7b798 286 * rcx return address for syscall/sysret, C arg3
1da177e4 287 * rsi arg1
0bd7b798 288 * rdx arg2
1da177e4
LT
289 * r10 arg3 (--> moved to rcx for C)
290 * r8 arg4
291 * r9 arg5
292 * r11 eflags for syscall/sysret, temporary for C
0bd7b798
AH
293 * r12-r15,rbp,rbx saved by C code, not touched.
294 *
1da177e4
LT
295 * Interrupts are off on entry.
296 * Only called from user space.
297 *
298 * XXX if we had a free scratch register we could save the RSP into the stack frame
299 * and report it properly in ps. Unfortunately we haven't.
7bf36bbc
AK
300 *
301 * When user can change the frames always force IRET. That is because
302 * it deals with uncanonical addresses better. SYSRET has trouble
303 * with them due to bugs in both AMD and Intel CPUs.
0bd7b798 304 */
1da177e4
LT
305
306ENTRY(system_call)
7effaa88 307 CFI_STARTPROC simple
adf14236 308 CFI_SIGNAL_FRAME
dffead4e 309 CFI_DEF_CFA rsp,PDA_STACKOFFSET
7effaa88
JB
310 CFI_REGISTER rip,rcx
311 /*CFI_REGISTER rflags,r11*/
72fe4858
GOC
312 SWAPGS_UNSAFE_STACK
313 /*
314 * A hypervisor implementation might want to use a label
315 * after the swapgs, so that it can do the swapgs
316 * for the guest and jump here on syscall.
317 */
318ENTRY(system_call_after_swapgs)
319
0bd7b798 320 movq %rsp,%gs:pda_oldrsp
1da177e4 321 movq %gs:pda_kernelstack,%rsp
2601e64d
IM
322 /*
323 * No need to follow this irqs off/on section - it's straight
324 * and short:
325 */
72fe4858 326 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 327 SAVE_ARGS 8,1
0bd7b798 328 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
7effaa88
JB
329 movq %rcx,RIP-ARGOFFSET(%rsp)
330 CFI_REL_OFFSET rip,RIP-ARGOFFSET
1da177e4 331 GET_THREAD_INFO(%rcx)
d4d67150 332 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
1da177e4 333 jnz tracesys
86a1c34a 334system_call_fastpath:
1da177e4
LT
335 cmpq $__NR_syscall_max,%rax
336 ja badsys
337 movq %r10,%rcx
338 call *sys_call_table(,%rax,8) # XXX: rip relative
339 movq %rax,RAX-ARGOFFSET(%rsp)
340/*
341 * Syscall return path ending with SYSRET (fast path)
0bd7b798
AH
342 * Has incomplete stack frame and undefined top of stack.
343 */
1da177e4 344ret_from_sys_call:
11b854b2 345 movl $_TIF_ALLWORK_MASK,%edi
1da177e4 346 /* edi: flagmask */
0bd7b798 347sysret_check:
10cd706d 348 LOCKDEP_SYS_EXIT
1da177e4 349 GET_THREAD_INFO(%rcx)
72fe4858 350 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 351 TRACE_IRQS_OFF
26ccb8a7 352 movl TI_flags(%rcx),%edx
1da177e4 353 andl %edi,%edx
0bd7b798 354 jnz sysret_careful
bcddc015 355 CFI_REMEMBER_STATE
2601e64d
IM
356 /*
357 * sysretq will re-enable interrupts:
358 */
359 TRACE_IRQS_ON
1da177e4 360 movq RIP-ARGOFFSET(%rsp),%rcx
7effaa88 361 CFI_REGISTER rip,rcx
1da177e4 362 RESTORE_ARGS 0,-ARG_SKIP,1
7effaa88 363 /*CFI_REGISTER rflags,r11*/
c7245da6 364 movq %gs:pda_oldrsp, %rsp
2be29982 365 USERGS_SYSRET64
1da177e4 366
bcddc015 367 CFI_RESTORE_STATE
1da177e4 368 /* Handle reschedules */
0bd7b798 369 /* edx: work, edi: workmask */
1da177e4
LT
370sysret_careful:
371 bt $TIF_NEED_RESCHED,%edx
372 jnc sysret_signal
2601e64d 373 TRACE_IRQS_ON
72fe4858 374 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 375 pushq %rdi
7effaa88 376 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
377 call schedule
378 popq %rdi
7effaa88 379 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
380 jmp sysret_check
381
0bd7b798 382 /* Handle a signal */
1da177e4 383sysret_signal:
2601e64d 384 TRACE_IRQS_ON
72fe4858 385 ENABLE_INTERRUPTS(CLBR_NONE)
86a1c34a
RM
386#ifdef CONFIG_AUDITSYSCALL
387 bt $TIF_SYSCALL_AUDIT,%edx
388 jc sysret_audit
389#endif
10ffdbb8 390 /* edx: work flags (arg3) */
1da177e4
LT
391 leaq do_notify_resume(%rip),%rax
392 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
393 xorl %esi,%esi # oldset -> arg2
394 call ptregscall_common
15e8f348 395 movl $_TIF_WORK_MASK,%edi
7bf36bbc
AK
396 /* Use IRET because user could have changed frame. This
397 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
72fe4858 398 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 399 TRACE_IRQS_OFF
7bf36bbc 400 jmp int_with_check
0bd7b798 401
7effaa88
JB
402badsys:
403 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
404 jmp ret_from_sys_call
405
86a1c34a
RM
406#ifdef CONFIG_AUDITSYSCALL
407 /*
408 * Fast path for syscall audit without full syscall trace.
409 * We just call audit_syscall_entry() directly, and then
410 * jump back to the normal fast path.
411 */
412auditsys:
413 movq %r10,%r9 /* 6th arg: 4th syscall arg */
414 movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
415 movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
416 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
417 movq %rax,%rsi /* 2nd arg: syscall number */
418 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
419 call audit_syscall_entry
420 LOAD_ARGS 0 /* reload call-clobbered registers */
421 jmp system_call_fastpath
422
423 /*
424 * Return fast path for syscall audit. Call audit_syscall_exit()
425 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
426 * masked off.
427 */
428sysret_audit:
429 movq %rax,%rsi /* second arg, syscall return value */
430 cmpq $0,%rax /* is it < 0? */
431 setl %al /* 1 if so, 0 if not */
432 movzbl %al,%edi /* zero-extend that into %edi */
433 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
434 call audit_syscall_exit
435 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
436 jmp sysret_check
437#endif /* CONFIG_AUDITSYSCALL */
438
1da177e4 439 /* Do syscall tracing */
0bd7b798 440tracesys:
86a1c34a
RM
441#ifdef CONFIG_AUDITSYSCALL
442 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
443 jz auditsys
444#endif
1da177e4 445 SAVE_REST
a31f8dd7 446 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
1da177e4
LT
447 FIXUP_TOP_OF_STACK %rdi
448 movq %rsp,%rdi
449 call syscall_trace_enter
d4d67150
RM
450 /*
451 * Reload arg registers from stack in case ptrace changed them.
452 * We don't reload %rax because syscall_trace_enter() returned
453 * the value it wants us to use in the table lookup.
454 */
455 LOAD_ARGS ARGOFFSET, 1
1da177e4
LT
456 RESTORE_REST
457 cmpq $__NR_syscall_max,%rax
a31f8dd7 458 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
1da177e4
LT
459 movq %r10,%rcx /* fixup for C */
460 call *sys_call_table(,%rax,8)
a31f8dd7 461 movq %rax,RAX-ARGOFFSET(%rsp)
7bf36bbc 462 /* Use IRET because user could have changed frame */
0bd7b798
AH
463
464/*
1da177e4
LT
465 * Syscall return path ending with IRET.
466 * Has correct top of stack, but partial stack frame.
bcddc015
JB
467 */
468 .globl int_ret_from_sys_call
5cbf1565 469 .globl int_with_check
bcddc015 470int_ret_from_sys_call:
72fe4858 471 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 472 TRACE_IRQS_OFF
1da177e4
LT
473 testl $3,CS-ARGOFFSET(%rsp)
474 je retint_restore_args
475 movl $_TIF_ALLWORK_MASK,%edi
476 /* edi: mask to check */
477int_with_check:
10cd706d 478 LOCKDEP_SYS_EXIT_IRQ
1da177e4 479 GET_THREAD_INFO(%rcx)
26ccb8a7 480 movl TI_flags(%rcx),%edx
1da177e4
LT
481 andl %edi,%edx
482 jnz int_careful
26ccb8a7 483 andl $~TS_COMPAT,TI_status(%rcx)
1da177e4
LT
484 jmp retint_swapgs
485
486 /* Either reschedule or signal or syscall exit tracking needed. */
487 /* First do a reschedule test. */
488 /* edx: work, edi: workmask */
489int_careful:
490 bt $TIF_NEED_RESCHED,%edx
491 jnc int_very_careful
2601e64d 492 TRACE_IRQS_ON
72fe4858 493 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 494 pushq %rdi
7effaa88 495 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
496 call schedule
497 popq %rdi
7effaa88 498 CFI_ADJUST_CFA_OFFSET -8
72fe4858 499 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 500 TRACE_IRQS_OFF
1da177e4
LT
501 jmp int_with_check
502
503 /* handle signals and tracing -- both require a full stack frame */
504int_very_careful:
2601e64d 505 TRACE_IRQS_ON
72fe4858 506 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 507 SAVE_REST
0bd7b798 508 /* Check for syscall exit trace */
d4d67150 509 testl $_TIF_WORK_SYSCALL_EXIT,%edx
1da177e4
LT
510 jz int_signal
511 pushq %rdi
7effaa88 512 CFI_ADJUST_CFA_OFFSET 8
0bd7b798 513 leaq 8(%rsp),%rdi # &ptregs -> arg1
1da177e4
LT
514 call syscall_trace_leave
515 popq %rdi
7effaa88 516 CFI_ADJUST_CFA_OFFSET -8
d4d67150 517 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
1da177e4 518 jmp int_restore_rest
0bd7b798 519
1da177e4 520int_signal:
8f4d37ec 521 testl $_TIF_DO_NOTIFY_MASK,%edx
1da177e4
LT
522 jz 1f
523 movq %rsp,%rdi # &ptregs -> arg1
524 xorl %esi,%esi # oldset -> arg2
525 call do_notify_resume
eca91e78 5261: movl $_TIF_WORK_MASK,%edi
1da177e4
LT
527int_restore_rest:
528 RESTORE_REST
72fe4858 529 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 530 TRACE_IRQS_OFF
1da177e4
LT
531 jmp int_with_check
532 CFI_ENDPROC
bcddc015 533END(system_call)
0bd7b798
AH
534
535/*
1da177e4 536 * Certain special system calls that need to save a complete full stack frame.
0bd7b798
AH
537 */
538
1da177e4
LT
539 .macro PTREGSCALL label,func,arg
540 .globl \label
541\label:
542 leaq \func(%rip),%rax
543 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
544 jmp ptregscall_common
4b787e0b 545END(\label)
1da177e4
LT
546 .endm
547
7effaa88
JB
548 CFI_STARTPROC
549
1da177e4
LT
550 PTREGSCALL stub_clone, sys_clone, %r8
551 PTREGSCALL stub_fork, sys_fork, %rdi
552 PTREGSCALL stub_vfork, sys_vfork, %rdi
1da177e4
LT
553 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
554 PTREGSCALL stub_iopl, sys_iopl, %rsi
555
556ENTRY(ptregscall_common)
1da177e4 557 popq %r11
7effaa88
JB
558 CFI_ADJUST_CFA_OFFSET -8
559 CFI_REGISTER rip, r11
1da177e4
LT
560 SAVE_REST
561 movq %r11, %r15
7effaa88 562 CFI_REGISTER rip, r15
1da177e4
LT
563 FIXUP_TOP_OF_STACK %r11
564 call *%rax
565 RESTORE_TOP_OF_STACK %r11
566 movq %r15, %r11
7effaa88 567 CFI_REGISTER rip, r11
1da177e4
LT
568 RESTORE_REST
569 pushq %r11
7effaa88
JB
570 CFI_ADJUST_CFA_OFFSET 8
571 CFI_REL_OFFSET rip, 0
1da177e4
LT
572 ret
573 CFI_ENDPROC
4b787e0b 574END(ptregscall_common)
0bd7b798 575
1da177e4
LT
576ENTRY(stub_execve)
577 CFI_STARTPROC
578 popq %r11
7effaa88
JB
579 CFI_ADJUST_CFA_OFFSET -8
580 CFI_REGISTER rip, r11
1da177e4 581 SAVE_REST
1da177e4 582 FIXUP_TOP_OF_STACK %r11
5d119b2c 583 movq %rsp, %rcx
1da177e4 584 call sys_execve
1da177e4 585 RESTORE_TOP_OF_STACK %r11
1da177e4
LT
586 movq %rax,RAX(%rsp)
587 RESTORE_REST
588 jmp int_ret_from_sys_call
589 CFI_ENDPROC
4b787e0b 590END(stub_execve)
0bd7b798 591
1da177e4
LT
592/*
593 * sigreturn is special because it needs to restore all registers on return.
594 * This cannot be done with SYSRET, so use the IRET return path instead.
0bd7b798 595 */
1da177e4
LT
596ENTRY(stub_rt_sigreturn)
597 CFI_STARTPROC
7effaa88
JB
598 addq $8, %rsp
599 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
600 SAVE_REST
601 movq %rsp,%rdi
602 FIXUP_TOP_OF_STACK %r11
603 call sys_rt_sigreturn
604 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
605 RESTORE_REST
606 jmp int_ret_from_sys_call
607 CFI_ENDPROC
4b787e0b 608END(stub_rt_sigreturn)
1da177e4 609
7effaa88
JB
610/*
611 * initial frame state for interrupts and exceptions
612 */
613 .macro _frame ref
614 CFI_STARTPROC simple
adf14236 615 CFI_SIGNAL_FRAME
7effaa88
JB
616 CFI_DEF_CFA rsp,SS+8-\ref
617 /*CFI_REL_OFFSET ss,SS-\ref*/
618 CFI_REL_OFFSET rsp,RSP-\ref
619 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
620 /*CFI_REL_OFFSET cs,CS-\ref*/
621 CFI_REL_OFFSET rip,RIP-\ref
622 .endm
623
624/* initial frame state for interrupts (and exceptions without error code) */
625#define INTR_FRAME _frame RIP
626/* initial frame state for exceptions with error code (and interrupts with
627 vector already pushed) */
628#define XCPT_FRAME _frame ORIG_RAX
629
939b7871
PA
630/*
631 * Build the entry stubs and pointer table with some assembler magic.
632 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
633 * single cache line on all modern x86 implementations.
634 */
635 .section .init.rodata,"a"
636ENTRY(interrupt)
637 .text
638 .p2align 5
639 .p2align CONFIG_X86_L1_CACHE_SHIFT
640ENTRY(irq_entries_start)
641 INTR_FRAME
642vector=FIRST_EXTERNAL_VECTOR
643.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
644 .balign 32
645 .rept 7
646 .if vector < NR_VECTORS
8665596e 647 .if vector <> FIRST_EXTERNAL_VECTOR
939b7871
PA
648 CFI_ADJUST_CFA_OFFSET -8
649 .endif
6501: pushq $(~vector+0x80) /* Note: always in signed byte range */
651 CFI_ADJUST_CFA_OFFSET 8
8665596e 652 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
939b7871
PA
653 jmp 2f
654 .endif
655 .previous
656 .quad 1b
657 .text
658vector=vector+1
659 .endif
660 .endr
6612: jmp common_interrupt
662.endr
663 CFI_ENDPROC
664END(irq_entries_start)
665
666.previous
667END(interrupt)
668.previous
669
1da177e4
LT
670/*
671 * Interrupt entry/exit.
672 *
673 * Interrupt entry points save only callee clobbered registers in fast path.
674 *
675 * Entry runs with interrupts off.
676 */
677
722024db 678/* 0(%rsp): ~(interrupt number) */
1da177e4 679 .macro interrupt func
1da177e4 680 cld
1da177e4 681 SAVE_ARGS
939b7871 682 leaq -ARGOFFSET(%rsp),%rdi /* arg1 for handler */
1de9c3f6 683 pushq %rbp
097a0788
GC
684 /*
685 * Save rbp twice: One is for marking the stack frame, as usual, and the
686 * other, to fill pt_regs properly. This is because bx comes right
687 * before the last saved register in that structure, and not bp. If the
688 * base pointer were in the place bx is today, this would not be needed.
689 */
690 movq %rbp, -8(%rsp)
1de9c3f6
JB
691 CFI_ADJUST_CFA_OFFSET 8
692 CFI_REL_OFFSET rbp, 0
693 movq %rsp,%rbp
694 CFI_DEF_CFA_REGISTER rbp
1da177e4
LT
695 testl $3,CS(%rdi)
696 je 1f
72fe4858 697 SWAPGS
96e54049
AK
698 /* irqcount is used to check if a CPU is already on an interrupt
699 stack or not. While this is essentially redundant with preempt_count
700 it is a little cheaper to use a separate counter in the PDA
701 (short of moving irq_enter into assembly, which would be too
702 much work) */
7031: incl %gs:pda_irqcount
1de9c3f6 704 cmoveq %gs:pda_irqstackptr,%rsp
2699500b 705 push %rbp # backlink for old unwinder
2601e64d
IM
706 /*
707 * We entered an interrupt context - irqs are off:
708 */
709 TRACE_IRQS_OFF
1da177e4
LT
710 call \func
711 .endm
712
722024db
AH
713 /*
714 * The interrupt stubs push (~vector+0x80) onto the stack and
715 * then jump to common_interrupt.
716 */
939b7871
PA
717 .p2align CONFIG_X86_L1_CACHE_SHIFT
718common_interrupt:
7effaa88 719 XCPT_FRAME
722024db 720 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
1da177e4
LT
721 interrupt do_IRQ
722 /* 0(%rsp): oldrsp-ARGOFFSET */
7effaa88 723ret_from_intr:
72fe4858 724 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 725 TRACE_IRQS_OFF
3829ee6b 726 decl %gs:pda_irqcount
1de9c3f6 727 leaveq
7effaa88 728 CFI_DEF_CFA_REGISTER rsp
1de9c3f6 729 CFI_ADJUST_CFA_OFFSET -8
7effaa88 730exit_intr:
1da177e4
LT
731 GET_THREAD_INFO(%rcx)
732 testl $3,CS-ARGOFFSET(%rsp)
733 je retint_kernel
0bd7b798 734
1da177e4
LT
735 /* Interrupt came from user space */
736 /*
737 * Has a correct top of stack, but a partial stack frame
738 * %rcx: thread info. Interrupts off.
0bd7b798 739 */
1da177e4
LT
740retint_with_reschedule:
741 movl $_TIF_WORK_MASK,%edi
7effaa88 742retint_check:
10cd706d 743 LOCKDEP_SYS_EXIT_IRQ
26ccb8a7 744 movl TI_flags(%rcx),%edx
1da177e4 745 andl %edi,%edx
7effaa88 746 CFI_REMEMBER_STATE
1da177e4 747 jnz retint_careful
10cd706d
PZ
748
749retint_swapgs: /* return to user-space */
2601e64d
IM
750 /*
751 * The iretq could re-enable interrupts:
752 */
72fe4858 753 DISABLE_INTERRUPTS(CLBR_ANY)
2601e64d 754 TRACE_IRQS_IRETQ
72fe4858 755 SWAPGS
2601e64d
IM
756 jmp restore_args
757
10cd706d 758retint_restore_args: /* return to kernel space */
72fe4858 759 DISABLE_INTERRUPTS(CLBR_ANY)
2601e64d
IM
760 /*
761 * The iretq could re-enable interrupts:
762 */
763 TRACE_IRQS_IRETQ
764restore_args:
3701d863
IM
765 RESTORE_ARGS 0,8,0
766
f7f3d791 767irq_return:
72fe4858 768 INTERRUPT_RETURN
3701d863
IM
769
770 .section __ex_table, "a"
771 .quad irq_return, bad_iret
772 .previous
773
774#ifdef CONFIG_PARAVIRT
72fe4858 775ENTRY(native_iret)
1da177e4
LT
776 iretq
777
778 .section __ex_table,"a"
72fe4858 779 .quad native_iret, bad_iret
1da177e4 780 .previous
3701d863
IM
781#endif
782
1da177e4 783 .section .fixup,"ax"
1da177e4 784bad_iret:
3aa4b37d
RM
785 /*
786 * The iret traps when the %cs or %ss being restored is bogus.
787 * We've lost the original trap vector and error code.
788 * #GPF is the most likely one to get for an invalid selector.
789 * So pretend we completed the iret and took the #GPF in user mode.
790 *
791 * We are now running with the kernel GS after exception recovery.
792 * But error_entry expects us to have user GS to match the user %cs,
793 * so swap back.
794 */
795 pushq $0
796
797 SWAPGS
798 jmp general_protection
799
72fe4858
GOC
800 .previous
801
7effaa88 802 /* edi: workmask, edx: work */
1da177e4 803retint_careful:
7effaa88 804 CFI_RESTORE_STATE
1da177e4
LT
805 bt $TIF_NEED_RESCHED,%edx
806 jnc retint_signal
2601e64d 807 TRACE_IRQS_ON
72fe4858 808 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 809 pushq %rdi
7effaa88 810 CFI_ADJUST_CFA_OFFSET 8
1da177e4 811 call schedule
0bd7b798 812 popq %rdi
7effaa88 813 CFI_ADJUST_CFA_OFFSET -8
1da177e4 814 GET_THREAD_INFO(%rcx)
72fe4858 815 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 816 TRACE_IRQS_OFF
1da177e4 817 jmp retint_check
0bd7b798 818
1da177e4 819retint_signal:
8f4d37ec 820 testl $_TIF_DO_NOTIFY_MASK,%edx
10ffdbb8 821 jz retint_swapgs
2601e64d 822 TRACE_IRQS_ON
72fe4858 823 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 824 SAVE_REST
0bd7b798 825 movq $-1,ORIG_RAX(%rsp)
3829ee6b 826 xorl %esi,%esi # oldset
1da177e4
LT
827 movq %rsp,%rdi # &pt_regs
828 call do_notify_resume
829 RESTORE_REST
72fe4858 830 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 831 TRACE_IRQS_OFF
be9e6870 832 GET_THREAD_INFO(%rcx)
eca91e78 833 jmp retint_with_reschedule
1da177e4
LT
834
835#ifdef CONFIG_PREEMPT
836 /* Returning to kernel space. Check if we need preemption */
837 /* rcx: threadinfo. interrupts off. */
b06babac 838ENTRY(retint_kernel)
26ccb8a7 839 cmpl $0,TI_preempt_count(%rcx)
1da177e4 840 jnz retint_restore_args
26ccb8a7 841 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
1da177e4
LT
842 jnc retint_restore_args
843 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
844 jnc retint_restore_args
845 call preempt_schedule_irq
846 jmp exit_intr
0bd7b798 847#endif
4b787e0b 848
1da177e4 849 CFI_ENDPROC
4b787e0b 850END(common_interrupt)
0bd7b798 851
1da177e4
LT
852/*
853 * APIC interrupts.
0bd7b798 854 */
1da177e4 855 .macro apicinterrupt num,func
7effaa88 856 INTR_FRAME
19eadf98 857 pushq $~(\num)
7effaa88 858 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
859 interrupt \func
860 jmp ret_from_intr
861 CFI_ENDPROC
862 .endm
863
864ENTRY(thermal_interrupt)
865 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
4b787e0b 866END(thermal_interrupt)
1da177e4 867
89b831ef
JS
868ENTRY(threshold_interrupt)
869 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
4b787e0b 870END(threshold_interrupt)
89b831ef 871
0bd7b798 872#ifdef CONFIG_SMP
1da177e4
LT
873ENTRY(reschedule_interrupt)
874 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
4b787e0b 875END(reschedule_interrupt)
1da177e4 876
e5bc8b6b
AK
877 .macro INVALIDATE_ENTRY num
878ENTRY(invalidate_interrupt\num)
0bd7b798 879 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
4b787e0b 880END(invalidate_interrupt\num)
e5bc8b6b
AK
881 .endm
882
883 INVALIDATE_ENTRY 0
884 INVALIDATE_ENTRY 1
885 INVALIDATE_ENTRY 2
886 INVALIDATE_ENTRY 3
887 INVALIDATE_ENTRY 4
888 INVALIDATE_ENTRY 5
889 INVALIDATE_ENTRY 6
890 INVALIDATE_ENTRY 7
1da177e4
LT
891
892ENTRY(call_function_interrupt)
893 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
4b787e0b 894END(call_function_interrupt)
3b16cf87
JA
895ENTRY(call_function_single_interrupt)
896 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
897END(call_function_single_interrupt)
61014292
EB
898ENTRY(irq_move_cleanup_interrupt)
899 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
900END(irq_move_cleanup_interrupt)
1da177e4
LT
901#endif
902
1da177e4
LT
903ENTRY(apic_timer_interrupt)
904 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
4b787e0b 905END(apic_timer_interrupt)
1da177e4 906
1812924b
CW
907ENTRY(uv_bau_message_intr1)
908 apicinterrupt 220,uv_bau_message_interrupt
909END(uv_bau_message_intr1)
910
1da177e4
LT
911ENTRY(error_interrupt)
912 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
4b787e0b 913END(error_interrupt)
1da177e4
LT
914
915ENTRY(spurious_interrupt)
916 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
4b787e0b 917END(spurious_interrupt)
0bd7b798 918
1da177e4
LT
919/*
920 * Exception entry points.
0bd7b798 921 */
1da177e4 922 .macro zeroentry sym
7effaa88 923 INTR_FRAME
fab58420 924 PARAVIRT_ADJUST_EXCEPTION_FRAME
0bd7b798 925 pushq $0 /* push error code/oldrax */
7effaa88 926 CFI_ADJUST_CFA_OFFSET 8
0bd7b798 927 pushq %rax /* push real oldrax to the rdi slot */
7effaa88 928 CFI_ADJUST_CFA_OFFSET 8
37550907 929 CFI_REL_OFFSET rax,0
1da177e4
LT
930 leaq \sym(%rip),%rax
931 jmp error_entry
7effaa88 932 CFI_ENDPROC
0bd7b798 933 .endm
1da177e4
LT
934
935 .macro errorentry sym
7effaa88 936 XCPT_FRAME
fab58420 937 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 938 pushq %rax
7effaa88 939 CFI_ADJUST_CFA_OFFSET 8
37550907 940 CFI_REL_OFFSET rax,0
1da177e4
LT
941 leaq \sym(%rip),%rax
942 jmp error_entry
7effaa88 943 CFI_ENDPROC
1da177e4
LT
944 .endm
945
946 /* error code is on the stack already */
947 /* handle NMI like exceptions that can happen everywhere */
2601e64d 948 .macro paranoidentry sym, ist=0, irqtrace=1
1da177e4
LT
949 SAVE_ALL
950 cld
951 movl $1,%ebx
952 movl $MSR_GS_BASE,%ecx
953 rdmsr
954 testl %edx,%edx
955 js 1f
72fe4858 956 SWAPGS
1da177e4 957 xorl %ebx,%ebx
b556b35e
JB
9581:
959 .if \ist
960 movq %gs:pda_data_offset, %rbp
961 .endif
7e61a793
AH
962 .if \irqtrace
963 TRACE_IRQS_OFF
964 .endif
b556b35e 965 movq %rsp,%rdi
1da177e4
LT
966 movq ORIG_RAX(%rsp),%rsi
967 movq $-1,ORIG_RAX(%rsp)
b556b35e 968 .if \ist
5f8efbb9 969 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
b556b35e 970 .endif
1da177e4 971 call \sym
b556b35e 972 .if \ist
5f8efbb9 973 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
b556b35e 974 .endif
72fe4858 975 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d
IM
976 .if \irqtrace
977 TRACE_IRQS_OFF
978 .endif
1da177e4 979 .endm
2601e64d
IM
980
981 /*
982 * "Paranoid" exit path from exception stack.
983 * Paranoid because this is used by NMIs and cannot take
984 * any kernel state for granted.
985 * We don't do kernel preemption checks here, because only
986 * NMI should be common and it does not enable IRQs and
987 * cannot get reschedule ticks.
988 *
989 * "trace" is 0 for the NMI handler only, because irq-tracing
990 * is fundamentally NMI-unsafe. (we cannot change the soft and
991 * hard flags at once, atomically)
992 */
993 .macro paranoidexit trace=1
994 /* ebx: no swapgs flag */
995paranoid_exit\trace:
996 testl %ebx,%ebx /* swapgs needed? */
997 jnz paranoid_restore\trace
998 testl $3,CS(%rsp)
999 jnz paranoid_userspace\trace
1000paranoid_swapgs\trace:
7a0a2dff 1001 .if \trace
2601e64d 1002 TRACE_IRQS_IRETQ 0
7a0a2dff 1003 .endif
72fe4858 1004 SWAPGS_UNSAFE_STACK
2601e64d
IM
1005paranoid_restore\trace:
1006 RESTORE_ALL 8
3701d863 1007 jmp irq_return
2601e64d
IM
1008paranoid_userspace\trace:
1009 GET_THREAD_INFO(%rcx)
26ccb8a7 1010 movl TI_flags(%rcx),%ebx
2601e64d
IM
1011 andl $_TIF_WORK_MASK,%ebx
1012 jz paranoid_swapgs\trace
1013 movq %rsp,%rdi /* &pt_regs */
1014 call sync_regs
1015 movq %rax,%rsp /* switch stack for scheduling */
1016 testl $_TIF_NEED_RESCHED,%ebx
1017 jnz paranoid_schedule\trace
1018 movl %ebx,%edx /* arg3: thread flags */
1019 .if \trace
1020 TRACE_IRQS_ON
1021 .endif
72fe4858 1022 ENABLE_INTERRUPTS(CLBR_NONE)
2601e64d
IM
1023 xorl %esi,%esi /* arg2: oldset */
1024 movq %rsp,%rdi /* arg1: &pt_regs */
1025 call do_notify_resume
72fe4858 1026 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d
IM
1027 .if \trace
1028 TRACE_IRQS_OFF
1029 .endif
1030 jmp paranoid_userspace\trace
1031paranoid_schedule\trace:
1032 .if \trace
1033 TRACE_IRQS_ON
1034 .endif
72fe4858 1035 ENABLE_INTERRUPTS(CLBR_ANY)
2601e64d 1036 call schedule
72fe4858 1037 DISABLE_INTERRUPTS(CLBR_ANY)
2601e64d
IM
1038 .if \trace
1039 TRACE_IRQS_OFF
1040 .endif
1041 jmp paranoid_userspace\trace
1042 CFI_ENDPROC
1043 .endm
1044
1da177e4
LT
1045/*
1046 * Exception entry point. This expects an error code/orig_rax on the stack
0bd7b798
AH
1047 * and the exception handler in %rax.
1048 */
d28c4393 1049KPROBE_ENTRY(error_entry)
7effaa88 1050 _frame RDI
37550907 1051 CFI_REL_OFFSET rax,0
1da177e4 1052 /* rdi slot contains rax, oldrax contains error code */
0bd7b798 1053 cld
1da177e4
LT
1054 subq $14*8,%rsp
1055 CFI_ADJUST_CFA_OFFSET (14*8)
1056 movq %rsi,13*8(%rsp)
1057 CFI_REL_OFFSET rsi,RSI
1058 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
37550907 1059 CFI_REGISTER rax,rsi
1da177e4
LT
1060 movq %rdx,12*8(%rsp)
1061 CFI_REL_OFFSET rdx,RDX
1062 movq %rcx,11*8(%rsp)
1063 CFI_REL_OFFSET rcx,RCX
0bd7b798 1064 movq %rsi,10*8(%rsp) /* store rax */
1da177e4
LT
1065 CFI_REL_OFFSET rax,RAX
1066 movq %r8, 9*8(%rsp)
1067 CFI_REL_OFFSET r8,R8
1068 movq %r9, 8*8(%rsp)
1069 CFI_REL_OFFSET r9,R9
1070 movq %r10,7*8(%rsp)
1071 CFI_REL_OFFSET r10,R10
1072 movq %r11,6*8(%rsp)
1073 CFI_REL_OFFSET r11,R11
0bd7b798 1074 movq %rbx,5*8(%rsp)
1da177e4 1075 CFI_REL_OFFSET rbx,RBX
0bd7b798 1076 movq %rbp,4*8(%rsp)
1da177e4 1077 CFI_REL_OFFSET rbp,RBP
0bd7b798 1078 movq %r12,3*8(%rsp)
1da177e4 1079 CFI_REL_OFFSET r12,R12
0bd7b798 1080 movq %r13,2*8(%rsp)
1da177e4 1081 CFI_REL_OFFSET r13,R13
0bd7b798 1082 movq %r14,1*8(%rsp)
1da177e4 1083 CFI_REL_OFFSET r14,R14
0bd7b798 1084 movq %r15,(%rsp)
1da177e4 1085 CFI_REL_OFFSET r15,R15
0bd7b798 1086 xorl %ebx,%ebx
1da177e4
LT
1087 testl $3,CS(%rsp)
1088 je error_kernelspace
0bd7b798 1089error_swapgs:
72fe4858 1090 SWAPGS
6b11d4ef
AH
1091error_sti:
1092 TRACE_IRQS_OFF
0bd7b798 1093 movq %rdi,RDI(%rsp)
37550907 1094 CFI_REL_OFFSET rdi,RDI
1da177e4 1095 movq %rsp,%rdi
0bd7b798 1096 movq ORIG_RAX(%rsp),%rsi /* get error code */
1da177e4
LT
1097 movq $-1,ORIG_RAX(%rsp)
1098 call *%rax
10cd706d
PZ
1099 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1100error_exit:
1101 movl %ebx,%eax
1da177e4 1102 RESTORE_REST
72fe4858 1103 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 1104 TRACE_IRQS_OFF
0bd7b798 1105 GET_THREAD_INFO(%rcx)
1da177e4
LT
1106 testl %eax,%eax
1107 jne retint_kernel
10cd706d 1108 LOCKDEP_SYS_EXIT_IRQ
26ccb8a7 1109 movl TI_flags(%rcx),%edx
1da177e4
LT
1110 movl $_TIF_WORK_MASK,%edi
1111 andl %edi,%edx
1112 jnz retint_careful
10cd706d 1113 jmp retint_swapgs
1da177e4
LT
1114 CFI_ENDPROC
1115
1116error_kernelspace:
1117 incl %ebx
1118 /* There are two places in the kernel that can potentially fault with
1119 usergs. Handle them here. The exception handlers after
1120 iret run with kernel gs again, so don't set the user space flag.
0bd7b798 1121 B stepping K8s sometimes report an truncated RIP for IRET
1da177e4 1122 exceptions returning to compat mode. Check for these here too. */
9d8ad5d6
VN
1123 leaq irq_return(%rip),%rcx
1124 cmpq %rcx,RIP(%rsp)
1da177e4 1125 je error_swapgs
9d8ad5d6
VN
1126 movl %ecx,%ecx /* zero extend */
1127 cmpq %rcx,RIP(%rsp)
1da177e4
LT
1128 je error_swapgs
1129 cmpq $gs_change,RIP(%rsp)
1130 je error_swapgs
1131 jmp error_sti
d28c4393 1132KPROBE_END(error_entry)
0bd7b798 1133
1da177e4 1134 /* Reload gs selector with exception handling */
0bd7b798 1135 /* edi: new selector */
9f9d489a 1136ENTRY(native_load_gs_index)
7effaa88 1137 CFI_STARTPROC
1da177e4 1138 pushf
7effaa88 1139 CFI_ADJUST_CFA_OFFSET 8
72fe4858
GOC
1140 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1141 SWAPGS
0bd7b798
AH
1142gs_change:
1143 movl %edi,%gs
1da177e4 11442: mfence /* workaround */
72fe4858 1145 SWAPGS
1da177e4 1146 popf
7effaa88 1147 CFI_ADJUST_CFA_OFFSET -8
1da177e4 1148 ret
7effaa88 1149 CFI_ENDPROC
9f9d489a 1150ENDPROC(native_load_gs_index)
0bd7b798 1151
1da177e4
LT
1152 .section __ex_table,"a"
1153 .align 8
1154 .quad gs_change,bad_gs
1155 .previous
1156 .section .fixup,"ax"
1157 /* running with kernelgs */
0bd7b798 1158bad_gs:
72fe4858 1159 SWAPGS /* switch back to user gs */
1da177e4
LT
1160 xorl %eax,%eax
1161 movl %eax,%gs
1162 jmp 2b
0bd7b798
AH
1163 .previous
1164
1da177e4
LT
1165/*
1166 * Create a kernel thread.
1167 *
1168 * C extern interface:
1169 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1170 *
1171 * asm input arguments:
1172 * rdi: fn, rsi: arg, rdx: flags
1173 */
1174ENTRY(kernel_thread)
1175 CFI_STARTPROC
1176 FAKE_STACK_FRAME $child_rip
1177 SAVE_ALL
1178
1179 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1180 movq %rdx,%rdi
1181 orq kernel_thread_flags(%rip),%rdi
1182 movq $-1, %rsi
1183 movq %rsp, %rdx
1184
1185 xorl %r8d,%r8d
1186 xorl %r9d,%r9d
0bd7b798 1187
1da177e4
LT
1188 # clone now
1189 call do_fork
1190 movq %rax,RAX(%rsp)
1191 xorl %edi,%edi
1192
1193 /*
1194 * It isn't worth to check for reschedule here,
1195 * so internally to the x86_64 port you can rely on kernel_thread()
1196 * not to reschedule the child before returning, this avoids the need
1197 * of hacks for example to fork off the per-CPU idle tasks.
0bd7b798 1198 * [Hopefully no generic code relies on the reschedule -AK]
1da177e4
LT
1199 */
1200 RESTORE_ALL
1201 UNFAKE_STACK_FRAME
1202 ret
1203 CFI_ENDPROC
4b787e0b 1204ENDPROC(kernel_thread)
0bd7b798 1205
1da177e4 1206child_rip:
c05991ed
AK
1207 pushq $0 # fake return address
1208 CFI_STARTPROC
1da177e4
LT
1209 /*
1210 * Here we are in the child and the registers are set as they were
1211 * at kernel_thread() invocation in the parent.
1212 */
1213 movq %rdi, %rax
1214 movq %rsi, %rdi
1215 call *%rax
1216 # exit
1c5b5cfd 1217 mov %eax, %edi
1da177e4 1218 call do_exit
c05991ed 1219 CFI_ENDPROC
4b787e0b 1220ENDPROC(child_rip)
1da177e4
LT
1221
1222/*
1223 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1224 *
1225 * C extern interface:
1226 * extern long execve(char *name, char **argv, char **envp)
1227 *
1228 * asm input arguments:
1229 * rdi: name, rsi: argv, rdx: envp
1230 *
1231 * We want to fallback into:
5d119b2c 1232 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1da177e4
LT
1233 *
1234 * do_sys_execve asm fallback arguments:
5d119b2c 1235 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1da177e4 1236 */
3db03b4a 1237ENTRY(kernel_execve)
1da177e4
LT
1238 CFI_STARTPROC
1239 FAKE_STACK_FRAME $0
0bd7b798 1240 SAVE_ALL
5d119b2c 1241 movq %rsp,%rcx
1da177e4 1242 call sys_execve
0bd7b798 1243 movq %rax, RAX(%rsp)
1da177e4
LT
1244 RESTORE_REST
1245 testq %rax,%rax
1246 je int_ret_from_sys_call
1247 RESTORE_ARGS
1248 UNFAKE_STACK_FRAME
1249 ret
1250 CFI_ENDPROC
3db03b4a 1251ENDPROC(kernel_execve)
1da177e4 1252
0f2fbdcb 1253KPROBE_ENTRY(page_fault)
1da177e4 1254 errorentry do_page_fault
d28c4393 1255KPROBE_END(page_fault)
1da177e4
LT
1256
1257ENTRY(coprocessor_error)
1258 zeroentry do_coprocessor_error
4b787e0b 1259END(coprocessor_error)
1da177e4
LT
1260
1261ENTRY(simd_coprocessor_error)
0bd7b798 1262 zeroentry do_simd_coprocessor_error
4b787e0b 1263END(simd_coprocessor_error)
1da177e4
LT
1264
1265ENTRY(device_not_available)
e407d620 1266 zeroentry do_device_not_available
4b787e0b 1267END(device_not_available)
1da177e4
LT
1268
1269 /* runs on exception stack */
0f2fbdcb 1270KPROBE_ENTRY(debug)
7effaa88 1271 INTR_FRAME
09402947 1272 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 1273 pushq $0
0bd7b798 1274 CFI_ADJUST_CFA_OFFSET 8
5f8efbb9 1275 paranoidentry do_debug, DEBUG_STACK
2601e64d 1276 paranoidexit
d28c4393 1277KPROBE_END(debug)
1da177e4 1278
0bd7b798 1279 /* runs on exception stack */
eddb6fb9 1280KPROBE_ENTRY(nmi)
7effaa88 1281 INTR_FRAME
09402947 1282 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 1283 pushq $-1
7effaa88 1284 CFI_ADJUST_CFA_OFFSET 8
2601e64d
IM
1285 paranoidentry do_nmi, 0, 0
1286#ifdef CONFIG_TRACE_IRQFLAGS
1287 paranoidexit 0
1288#else
1289 jmp paranoid_exit1
1290 CFI_ENDPROC
1291#endif
d28c4393 1292KPROBE_END(nmi)
6fefb0d1 1293
0f2fbdcb 1294KPROBE_ENTRY(int3)
b556b35e 1295 INTR_FRAME
09402947 1296 PARAVIRT_ADJUST_EXCEPTION_FRAME
b556b35e
JB
1297 pushq $0
1298 CFI_ADJUST_CFA_OFFSET 8
5f8efbb9 1299 paranoidentry do_int3, DEBUG_STACK
2601e64d 1300 jmp paranoid_exit1
b556b35e 1301 CFI_ENDPROC
d28c4393 1302KPROBE_END(int3)
1da177e4
LT
1303
1304ENTRY(overflow)
1305 zeroentry do_overflow
4b787e0b 1306END(overflow)
1da177e4
LT
1307
1308ENTRY(bounds)
1309 zeroentry do_bounds
4b787e0b 1310END(bounds)
1da177e4
LT
1311
1312ENTRY(invalid_op)
0bd7b798 1313 zeroentry do_invalid_op
4b787e0b 1314END(invalid_op)
1da177e4
LT
1315
1316ENTRY(coprocessor_segment_overrun)
1317 zeroentry do_coprocessor_segment_overrun
4b787e0b 1318END(coprocessor_segment_overrun)
1da177e4 1319
1da177e4
LT
1320 /* runs on exception stack */
1321ENTRY(double_fault)
7effaa88 1322 XCPT_FRAME
09402947 1323 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 1324 paranoidentry do_double_fault
2601e64d 1325 jmp paranoid_exit1
1da177e4 1326 CFI_ENDPROC
4b787e0b 1327END(double_fault)
1da177e4
LT
1328
1329ENTRY(invalid_TSS)
1330 errorentry do_invalid_TSS
4b787e0b 1331END(invalid_TSS)
1da177e4
LT
1332
1333ENTRY(segment_not_present)
1334 errorentry do_segment_not_present
4b787e0b 1335END(segment_not_present)
1da177e4
LT
1336
1337 /* runs on exception stack */
1338ENTRY(stack_segment)
7effaa88 1339 XCPT_FRAME
09402947 1340 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 1341 paranoidentry do_stack_segment
2601e64d 1342 jmp paranoid_exit1
1da177e4 1343 CFI_ENDPROC
4b787e0b 1344END(stack_segment)
1da177e4 1345
0f2fbdcb 1346KPROBE_ENTRY(general_protection)
1da177e4 1347 errorentry do_general_protection
d28c4393 1348KPROBE_END(general_protection)
1da177e4
LT
1349
1350ENTRY(alignment_check)
1351 errorentry do_alignment_check
4b787e0b 1352END(alignment_check)
1da177e4
LT
1353
1354ENTRY(divide_error)
1355 zeroentry do_divide_error
4b787e0b 1356END(divide_error)
1da177e4
LT
1357
1358ENTRY(spurious_interrupt_bug)
1359 zeroentry do_spurious_interrupt_bug
4b787e0b 1360END(spurious_interrupt_bug)
1da177e4
LT
1361
1362#ifdef CONFIG_X86_MCE
1363 /* runs on exception stack */
1364ENTRY(machine_check)
7effaa88 1365 INTR_FRAME
09402947 1366 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 1367 pushq $0
0bd7b798 1368 CFI_ADJUST_CFA_OFFSET 8
1da177e4 1369 paranoidentry do_machine_check
2601e64d 1370 jmp paranoid_exit1
1da177e4 1371 CFI_ENDPROC
4b787e0b 1372END(machine_check)
1da177e4
LT
1373#endif
1374
2699500b 1375/* Call softirq on interrupt stack. Interrupts are off. */
ed6b676c 1376ENTRY(call_softirq)
7effaa88 1377 CFI_STARTPROC
2699500b
AK
1378 push %rbp
1379 CFI_ADJUST_CFA_OFFSET 8
1380 CFI_REL_OFFSET rbp,0
1381 mov %rsp,%rbp
1382 CFI_DEF_CFA_REGISTER rbp
ed6b676c 1383 incl %gs:pda_irqcount
2699500b
AK
1384 cmove %gs:pda_irqstackptr,%rsp
1385 push %rbp # backlink for old unwinder
ed6b676c 1386 call __do_softirq
2699500b 1387 leaveq
7effaa88 1388 CFI_DEF_CFA_REGISTER rsp
2699500b 1389 CFI_ADJUST_CFA_OFFSET -8
ed6b676c 1390 decl %gs:pda_irqcount
ed6b676c 1391 ret
7effaa88 1392 CFI_ENDPROC
4b787e0b 1393ENDPROC(call_softirq)
75154f40
AK
1394
1395KPROBE_ENTRY(ignore_sysret)
1396 CFI_STARTPROC
1397 mov $-ENOSYS,%eax
1398 sysret
1399 CFI_ENDPROC
1400ENDPROC(ignore_sysret)
3d75e1b8
JF
1401
1402#ifdef CONFIG_XEN
1403ENTRY(xen_hypervisor_callback)
1404 zeroentry xen_do_hypervisor_callback
1405END(xen_hypervisor_callback)
1406
1407/*
1408# A note on the "critical region" in our callback handler.
1409# We want to avoid stacking callback handlers due to events occurring
1410# during handling of the last event. To do this, we keep events disabled
1411# until we've done all processing. HOWEVER, we must enable events before
1412# popping the stack frame (can't be done atomically) and so it would still
1413# be possible to get enough handler activations to overflow the stack.
1414# Although unlikely, bugs of that kind are hard to track down, so we'd
1415# like to avoid the possibility.
1416# So, on entry to the handler we detect whether we interrupted an
1417# existing activation in its critical region -- if so, we pop the current
1418# activation and restart the handler using the previous one.
1419*/
1420ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1421 CFI_STARTPROC
1422/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1423 see the correct pointer to the pt_regs */
1424 movq %rdi, %rsp # we don't return, adjust the stack frame
1425 CFI_ENDPROC
1426 CFI_DEFAULT_STACK
142711: incl %gs:pda_irqcount
1428 movq %rsp,%rbp
1429 CFI_DEF_CFA_REGISTER rbp
1430 cmovzq %gs:pda_irqstackptr,%rsp
1431 pushq %rbp # backlink for old unwinder
1432 call xen_evtchn_do_upcall
1433 popq %rsp
1434 CFI_DEF_CFA_REGISTER rsp
1435 decl %gs:pda_irqcount
1436 jmp error_exit
1437 CFI_ENDPROC
1438END(do_hypervisor_callback)
1439
1440/*
1441# Hypervisor uses this for application faults while it executes.
1442# We get here for two reasons:
1443# 1. Fault while reloading DS, ES, FS or GS
1444# 2. Fault while executing IRET
1445# Category 1 we do not need to fix up as Xen has already reloaded all segment
1446# registers that could be reloaded and zeroed the others.
1447# Category 2 we fix up by killing the current process. We cannot use the
1448# normal Linux return path in this case because if we use the IRET hypercall
1449# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1450# We distinguish between categories by comparing each saved segment register
1451# with its current contents: any discrepancy means we in category 1.
1452*/
1453ENTRY(xen_failsafe_callback)
4a5c3e77
JF
1454 framesz = (RIP-0x30) /* workaround buggy gas */
1455 _frame framesz
3d75e1b8
JF
1456 CFI_REL_OFFSET rcx, 0
1457 CFI_REL_OFFSET r11, 8
1458 movw %ds,%cx
1459 cmpw %cx,0x10(%rsp)
1460 CFI_REMEMBER_STATE
1461 jne 1f
1462 movw %es,%cx
1463 cmpw %cx,0x18(%rsp)
1464 jne 1f
1465 movw %fs,%cx
1466 cmpw %cx,0x20(%rsp)
1467 jne 1f
1468 movw %gs,%cx
1469 cmpw %cx,0x28(%rsp)
1470 jne 1f
1471 /* All segments match their saved values => Category 2 (Bad IRET). */
1472 movq (%rsp),%rcx
1473 CFI_RESTORE rcx
1474 movq 8(%rsp),%r11
1475 CFI_RESTORE r11
1476 addq $0x30,%rsp
1477 CFI_ADJUST_CFA_OFFSET -0x30
4a5c3e77
JF
1478 pushq $0
1479 CFI_ADJUST_CFA_OFFSET 8
1480 pushq %r11
1481 CFI_ADJUST_CFA_OFFSET 8
1482 pushq %rcx
1483 CFI_ADJUST_CFA_OFFSET 8
1484 jmp general_protection
3d75e1b8
JF
1485 CFI_RESTORE_STATE
14861: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1487 movq (%rsp),%rcx
1488 CFI_RESTORE rcx
1489 movq 8(%rsp),%r11
1490 CFI_RESTORE r11
1491 addq $0x30,%rsp
1492 CFI_ADJUST_CFA_OFFSET -0x30
1493 pushq $0
1494 CFI_ADJUST_CFA_OFFSET 8
1495 SAVE_ALL
1496 jmp error_exit
1497 CFI_ENDPROC
3d75e1b8
JF
1498END(xen_failsafe_callback)
1499
1500#endif /* CONFIG_XEN */
This page took 0.723206 seconds and 5 git commands to generate.