ftrace: use dynamic patching for updating mcount calls
[deliverable/linux.git] / arch / x86 / kernel / entry_64.S
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */
8
9 /*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */
39
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
47 #include <asm/msr.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
51 #include <asm/page.h>
52 #include <asm/irqflags.h>
53 #include <asm/paravirt.h>
54
55 .code64
56
57 #ifdef CONFIG_FTRACE
58 #ifdef CONFIG_DYNAMIC_FTRACE
59 ENTRY(mcount)
60
61 subq $0x38, %rsp
62 movq %rax, (%rsp)
63 movq %rcx, 8(%rsp)
64 movq %rdx, 16(%rsp)
65 movq %rsi, 24(%rsp)
66 movq %rdi, 32(%rsp)
67 movq %r8, 40(%rsp)
68 movq %r9, 48(%rsp)
69
70 movq 0x38(%rsp), %rdi
71
72 .globl mcount_call
73 mcount_call:
74 call ftrace_stub
75
76 movq 48(%rsp), %r9
77 movq 40(%rsp), %r8
78 movq 32(%rsp), %rdi
79 movq 24(%rsp), %rsi
80 movq 16(%rsp), %rdx
81 movq 8(%rsp), %rcx
82 movq (%rsp), %rax
83 addq $0x38, %rsp
84
85 retq
86 END(mcount)
87
88 ENTRY(ftrace_caller)
89
90 /* taken from glibc */
91 subq $0x38, %rsp
92 movq %rax, (%rsp)
93 movq %rcx, 8(%rsp)
94 movq %rdx, 16(%rsp)
95 movq %rsi, 24(%rsp)
96 movq %rdi, 32(%rsp)
97 movq %r8, 40(%rsp)
98 movq %r9, 48(%rsp)
99
100 movq 0x38(%rsp), %rdi
101 movq 8(%rbp), %rsi
102
103 .globl ftrace_call
104 ftrace_call:
105 call ftrace_stub
106
107 movq 48(%rsp), %r9
108 movq 40(%rsp), %r8
109 movq 32(%rsp), %rdi
110 movq 24(%rsp), %rsi
111 movq 16(%rsp), %rdx
112 movq 8(%rsp), %rcx
113 movq (%rsp), %rax
114 addq $0x38, %rsp
115
116 .globl ftrace_stub
117 ftrace_stub:
118 retq
119 END(ftrace_caller)
120
121 #else /* ! CONFIG_DYNAMIC_FTRACE */
122 ENTRY(mcount)
123 cmpq $ftrace_stub, ftrace_trace_function
124 jnz trace
125 .globl ftrace_stub
126 ftrace_stub:
127 retq
128
129 trace:
130 /* taken from glibc */
131 subq $0x38, %rsp
132 movq %rax, (%rsp)
133 movq %rcx, 8(%rsp)
134 movq %rdx, 16(%rsp)
135 movq %rsi, 24(%rsp)
136 movq %rdi, 32(%rsp)
137 movq %r8, 40(%rsp)
138 movq %r9, 48(%rsp)
139
140 movq 0x38(%rsp), %rdi
141 movq 8(%rbp), %rsi
142
143 call *ftrace_trace_function
144
145 movq 48(%rsp), %r9
146 movq 40(%rsp), %r8
147 movq 32(%rsp), %rdi
148 movq 24(%rsp), %rsi
149 movq 16(%rsp), %rdx
150 movq 8(%rsp), %rcx
151 movq (%rsp), %rax
152 addq $0x38, %rsp
153
154 jmp ftrace_stub
155 END(mcount)
156 #endif /* CONFIG_DYNAMIC_FTRACE */
157 #endif /* CONFIG_FTRACE */
158
159 #ifndef CONFIG_PREEMPT
160 #define retint_kernel retint_restore_args
161 #endif
162
163 #ifdef CONFIG_PARAVIRT
164 ENTRY(native_irq_enable_syscall_ret)
165 movq %gs:pda_oldrsp,%rsp
166 swapgs
167 sysretq
168 #endif /* CONFIG_PARAVIRT */
169
170
171 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
172 #ifdef CONFIG_TRACE_IRQFLAGS
173 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
174 jnc 1f
175 TRACE_IRQS_ON
176 1:
177 #endif
178 .endm
179
180 /*
181 * C code is not supposed to know about undefined top of stack. Every time
182 * a C function with an pt_regs argument is called from the SYSCALL based
183 * fast path FIXUP_TOP_OF_STACK is needed.
184 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
185 * manipulation.
186 */
187
188 /* %rsp:at FRAMEEND */
189 .macro FIXUP_TOP_OF_STACK tmp
190 movq %gs:pda_oldrsp,\tmp
191 movq \tmp,RSP(%rsp)
192 movq $__USER_DS,SS(%rsp)
193 movq $__USER_CS,CS(%rsp)
194 movq $-1,RCX(%rsp)
195 movq R11(%rsp),\tmp /* get eflags */
196 movq \tmp,EFLAGS(%rsp)
197 .endm
198
199 .macro RESTORE_TOP_OF_STACK tmp,offset=0
200 movq RSP-\offset(%rsp),\tmp
201 movq \tmp,%gs:pda_oldrsp
202 movq EFLAGS-\offset(%rsp),\tmp
203 movq \tmp,R11-\offset(%rsp)
204 .endm
205
206 .macro FAKE_STACK_FRAME child_rip
207 /* push in order ss, rsp, eflags, cs, rip */
208 xorl %eax, %eax
209 pushq %rax /* ss */
210 CFI_ADJUST_CFA_OFFSET 8
211 /*CFI_REL_OFFSET ss,0*/
212 pushq %rax /* rsp */
213 CFI_ADJUST_CFA_OFFSET 8
214 CFI_REL_OFFSET rsp,0
215 pushq $(1<<9) /* eflags - interrupts on */
216 CFI_ADJUST_CFA_OFFSET 8
217 /*CFI_REL_OFFSET rflags,0*/
218 pushq $__KERNEL_CS /* cs */
219 CFI_ADJUST_CFA_OFFSET 8
220 /*CFI_REL_OFFSET cs,0*/
221 pushq \child_rip /* rip */
222 CFI_ADJUST_CFA_OFFSET 8
223 CFI_REL_OFFSET rip,0
224 pushq %rax /* orig rax */
225 CFI_ADJUST_CFA_OFFSET 8
226 .endm
227
228 .macro UNFAKE_STACK_FRAME
229 addq $8*6, %rsp
230 CFI_ADJUST_CFA_OFFSET -(6*8)
231 .endm
232
233 .macro CFI_DEFAULT_STACK start=1
234 .if \start
235 CFI_STARTPROC simple
236 CFI_SIGNAL_FRAME
237 CFI_DEF_CFA rsp,SS+8
238 .else
239 CFI_DEF_CFA_OFFSET SS+8
240 .endif
241 CFI_REL_OFFSET r15,R15
242 CFI_REL_OFFSET r14,R14
243 CFI_REL_OFFSET r13,R13
244 CFI_REL_OFFSET r12,R12
245 CFI_REL_OFFSET rbp,RBP
246 CFI_REL_OFFSET rbx,RBX
247 CFI_REL_OFFSET r11,R11
248 CFI_REL_OFFSET r10,R10
249 CFI_REL_OFFSET r9,R9
250 CFI_REL_OFFSET r8,R8
251 CFI_REL_OFFSET rax,RAX
252 CFI_REL_OFFSET rcx,RCX
253 CFI_REL_OFFSET rdx,RDX
254 CFI_REL_OFFSET rsi,RSI
255 CFI_REL_OFFSET rdi,RDI
256 CFI_REL_OFFSET rip,RIP
257 /*CFI_REL_OFFSET cs,CS*/
258 /*CFI_REL_OFFSET rflags,EFLAGS*/
259 CFI_REL_OFFSET rsp,RSP
260 /*CFI_REL_OFFSET ss,SS*/
261 .endm
262 /*
263 * A newly forked process directly context switches into this.
264 */
265 /* rdi: prev */
266 ENTRY(ret_from_fork)
267 CFI_DEFAULT_STACK
268 push kernel_eflags(%rip)
269 CFI_ADJUST_CFA_OFFSET 4
270 popf # reset kernel eflags
271 CFI_ADJUST_CFA_OFFSET -4
272 call schedule_tail
273 GET_THREAD_INFO(%rcx)
274 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
275 jnz rff_trace
276 rff_action:
277 RESTORE_REST
278 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
279 je int_ret_from_sys_call
280 testl $_TIF_IA32,threadinfo_flags(%rcx)
281 jnz int_ret_from_sys_call
282 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
283 jmp ret_from_sys_call
284 rff_trace:
285 movq %rsp,%rdi
286 call syscall_trace_leave
287 GET_THREAD_INFO(%rcx)
288 jmp rff_action
289 CFI_ENDPROC
290 END(ret_from_fork)
291
292 /*
293 * System call entry. Upto 6 arguments in registers are supported.
294 *
295 * SYSCALL does not save anything on the stack and does not change the
296 * stack pointer.
297 */
298
299 /*
300 * Register setup:
301 * rax system call number
302 * rdi arg0
303 * rcx return address for syscall/sysret, C arg3
304 * rsi arg1
305 * rdx arg2
306 * r10 arg3 (--> moved to rcx for C)
307 * r8 arg4
308 * r9 arg5
309 * r11 eflags for syscall/sysret, temporary for C
310 * r12-r15,rbp,rbx saved by C code, not touched.
311 *
312 * Interrupts are off on entry.
313 * Only called from user space.
314 *
315 * XXX if we had a free scratch register we could save the RSP into the stack frame
316 * and report it properly in ps. Unfortunately we haven't.
317 *
318 * When user can change the frames always force IRET. That is because
319 * it deals with uncanonical addresses better. SYSRET has trouble
320 * with them due to bugs in both AMD and Intel CPUs.
321 */
322
323 ENTRY(system_call)
324 CFI_STARTPROC simple
325 CFI_SIGNAL_FRAME
326 CFI_DEF_CFA rsp,PDA_STACKOFFSET
327 CFI_REGISTER rip,rcx
328 /*CFI_REGISTER rflags,r11*/
329 SWAPGS_UNSAFE_STACK
330 /*
331 * A hypervisor implementation might want to use a label
332 * after the swapgs, so that it can do the swapgs
333 * for the guest and jump here on syscall.
334 */
335 ENTRY(system_call_after_swapgs)
336
337 movq %rsp,%gs:pda_oldrsp
338 movq %gs:pda_kernelstack,%rsp
339 /*
340 * No need to follow this irqs off/on section - it's straight
341 * and short:
342 */
343 ENABLE_INTERRUPTS(CLBR_NONE)
344 SAVE_ARGS 8,1
345 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
346 movq %rcx,RIP-ARGOFFSET(%rsp)
347 CFI_REL_OFFSET rip,RIP-ARGOFFSET
348 GET_THREAD_INFO(%rcx)
349 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
350 jnz tracesys
351 cmpq $__NR_syscall_max,%rax
352 ja badsys
353 movq %r10,%rcx
354 call *sys_call_table(,%rax,8) # XXX: rip relative
355 movq %rax,RAX-ARGOFFSET(%rsp)
356 /*
357 * Syscall return path ending with SYSRET (fast path)
358 * Has incomplete stack frame and undefined top of stack.
359 */
360 ret_from_sys_call:
361 movl $_TIF_ALLWORK_MASK,%edi
362 /* edi: flagmask */
363 sysret_check:
364 LOCKDEP_SYS_EXIT
365 GET_THREAD_INFO(%rcx)
366 DISABLE_INTERRUPTS(CLBR_NONE)
367 TRACE_IRQS_OFF
368 movl threadinfo_flags(%rcx),%edx
369 andl %edi,%edx
370 jnz sysret_careful
371 CFI_REMEMBER_STATE
372 /*
373 * sysretq will re-enable interrupts:
374 */
375 TRACE_IRQS_ON
376 movq RIP-ARGOFFSET(%rsp),%rcx
377 CFI_REGISTER rip,rcx
378 RESTORE_ARGS 0,-ARG_SKIP,1
379 /*CFI_REGISTER rflags,r11*/
380 ENABLE_INTERRUPTS_SYSCALL_RET
381
382 CFI_RESTORE_STATE
383 /* Handle reschedules */
384 /* edx: work, edi: workmask */
385 sysret_careful:
386 bt $TIF_NEED_RESCHED,%edx
387 jnc sysret_signal
388 TRACE_IRQS_ON
389 ENABLE_INTERRUPTS(CLBR_NONE)
390 pushq %rdi
391 CFI_ADJUST_CFA_OFFSET 8
392 call schedule
393 popq %rdi
394 CFI_ADJUST_CFA_OFFSET -8
395 jmp sysret_check
396
397 /* Handle a signal */
398 sysret_signal:
399 TRACE_IRQS_ON
400 ENABLE_INTERRUPTS(CLBR_NONE)
401 testl $_TIF_DO_NOTIFY_MASK,%edx
402 jz 1f
403
404 /* Really a signal */
405 /* edx: work flags (arg3) */
406 leaq do_notify_resume(%rip),%rax
407 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
408 xorl %esi,%esi # oldset -> arg2
409 call ptregscall_common
410 1: movl $_TIF_NEED_RESCHED,%edi
411 /* Use IRET because user could have changed frame. This
412 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
413 DISABLE_INTERRUPTS(CLBR_NONE)
414 TRACE_IRQS_OFF
415 jmp int_with_check
416
417 badsys:
418 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
419 jmp ret_from_sys_call
420
421 /* Do syscall tracing */
422 tracesys:
423 SAVE_REST
424 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
425 FIXUP_TOP_OF_STACK %rdi
426 movq %rsp,%rdi
427 call syscall_trace_enter
428 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
429 RESTORE_REST
430 cmpq $__NR_syscall_max,%rax
431 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
432 movq %r10,%rcx /* fixup for C */
433 call *sys_call_table(,%rax,8)
434 movq %rax,RAX-ARGOFFSET(%rsp)
435 /* Use IRET because user could have changed frame */
436
437 /*
438 * Syscall return path ending with IRET.
439 * Has correct top of stack, but partial stack frame.
440 */
441 .globl int_ret_from_sys_call
442 int_ret_from_sys_call:
443 DISABLE_INTERRUPTS(CLBR_NONE)
444 TRACE_IRQS_OFF
445 testl $3,CS-ARGOFFSET(%rsp)
446 je retint_restore_args
447 movl $_TIF_ALLWORK_MASK,%edi
448 /* edi: mask to check */
449 int_with_check:
450 LOCKDEP_SYS_EXIT_IRQ
451 GET_THREAD_INFO(%rcx)
452 movl threadinfo_flags(%rcx),%edx
453 andl %edi,%edx
454 jnz int_careful
455 andl $~TS_COMPAT,threadinfo_status(%rcx)
456 jmp retint_swapgs
457
458 /* Either reschedule or signal or syscall exit tracking needed. */
459 /* First do a reschedule test. */
460 /* edx: work, edi: workmask */
461 int_careful:
462 bt $TIF_NEED_RESCHED,%edx
463 jnc int_very_careful
464 TRACE_IRQS_ON
465 ENABLE_INTERRUPTS(CLBR_NONE)
466 pushq %rdi
467 CFI_ADJUST_CFA_OFFSET 8
468 call schedule
469 popq %rdi
470 CFI_ADJUST_CFA_OFFSET -8
471 DISABLE_INTERRUPTS(CLBR_NONE)
472 TRACE_IRQS_OFF
473 jmp int_with_check
474
475 /* handle signals and tracing -- both require a full stack frame */
476 int_very_careful:
477 TRACE_IRQS_ON
478 ENABLE_INTERRUPTS(CLBR_NONE)
479 SAVE_REST
480 /* Check for syscall exit trace */
481 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
482 jz int_signal
483 pushq %rdi
484 CFI_ADJUST_CFA_OFFSET 8
485 leaq 8(%rsp),%rdi # &ptregs -> arg1
486 call syscall_trace_leave
487 popq %rdi
488 CFI_ADJUST_CFA_OFFSET -8
489 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
490 jmp int_restore_rest
491
492 int_signal:
493 testl $_TIF_DO_NOTIFY_MASK,%edx
494 jz 1f
495 movq %rsp,%rdi # &ptregs -> arg1
496 xorl %esi,%esi # oldset -> arg2
497 call do_notify_resume
498 1: movl $_TIF_NEED_RESCHED,%edi
499 int_restore_rest:
500 RESTORE_REST
501 DISABLE_INTERRUPTS(CLBR_NONE)
502 TRACE_IRQS_OFF
503 jmp int_with_check
504 CFI_ENDPROC
505 END(system_call)
506
507 /*
508 * Certain special system calls that need to save a complete full stack frame.
509 */
510
511 .macro PTREGSCALL label,func,arg
512 .globl \label
513 \label:
514 leaq \func(%rip),%rax
515 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
516 jmp ptregscall_common
517 END(\label)
518 .endm
519
520 CFI_STARTPROC
521
522 PTREGSCALL stub_clone, sys_clone, %r8
523 PTREGSCALL stub_fork, sys_fork, %rdi
524 PTREGSCALL stub_vfork, sys_vfork, %rdi
525 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
526 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
527 PTREGSCALL stub_iopl, sys_iopl, %rsi
528
529 ENTRY(ptregscall_common)
530 popq %r11
531 CFI_ADJUST_CFA_OFFSET -8
532 CFI_REGISTER rip, r11
533 SAVE_REST
534 movq %r11, %r15
535 CFI_REGISTER rip, r15
536 FIXUP_TOP_OF_STACK %r11
537 call *%rax
538 RESTORE_TOP_OF_STACK %r11
539 movq %r15, %r11
540 CFI_REGISTER rip, r11
541 RESTORE_REST
542 pushq %r11
543 CFI_ADJUST_CFA_OFFSET 8
544 CFI_REL_OFFSET rip, 0
545 ret
546 CFI_ENDPROC
547 END(ptregscall_common)
548
549 ENTRY(stub_execve)
550 CFI_STARTPROC
551 popq %r11
552 CFI_ADJUST_CFA_OFFSET -8
553 CFI_REGISTER rip, r11
554 SAVE_REST
555 FIXUP_TOP_OF_STACK %r11
556 movq %rsp, %rcx
557 call sys_execve
558 RESTORE_TOP_OF_STACK %r11
559 movq %rax,RAX(%rsp)
560 RESTORE_REST
561 jmp int_ret_from_sys_call
562 CFI_ENDPROC
563 END(stub_execve)
564
565 /*
566 * sigreturn is special because it needs to restore all registers on return.
567 * This cannot be done with SYSRET, so use the IRET return path instead.
568 */
569 ENTRY(stub_rt_sigreturn)
570 CFI_STARTPROC
571 addq $8, %rsp
572 CFI_ADJUST_CFA_OFFSET -8
573 SAVE_REST
574 movq %rsp,%rdi
575 FIXUP_TOP_OF_STACK %r11
576 call sys_rt_sigreturn
577 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
578 RESTORE_REST
579 jmp int_ret_from_sys_call
580 CFI_ENDPROC
581 END(stub_rt_sigreturn)
582
583 /*
584 * initial frame state for interrupts and exceptions
585 */
586 .macro _frame ref
587 CFI_STARTPROC simple
588 CFI_SIGNAL_FRAME
589 CFI_DEF_CFA rsp,SS+8-\ref
590 /*CFI_REL_OFFSET ss,SS-\ref*/
591 CFI_REL_OFFSET rsp,RSP-\ref
592 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
593 /*CFI_REL_OFFSET cs,CS-\ref*/
594 CFI_REL_OFFSET rip,RIP-\ref
595 .endm
596
597 /* initial frame state for interrupts (and exceptions without error code) */
598 #define INTR_FRAME _frame RIP
599 /* initial frame state for exceptions with error code (and interrupts with
600 vector already pushed) */
601 #define XCPT_FRAME _frame ORIG_RAX
602
603 /*
604 * Interrupt entry/exit.
605 *
606 * Interrupt entry points save only callee clobbered registers in fast path.
607 *
608 * Entry runs with interrupts off.
609 */
610
611 /* 0(%rsp): interrupt number */
612 .macro interrupt func
613 cld
614 SAVE_ARGS
615 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
616 pushq %rbp
617 CFI_ADJUST_CFA_OFFSET 8
618 CFI_REL_OFFSET rbp, 0
619 movq %rsp,%rbp
620 CFI_DEF_CFA_REGISTER rbp
621 testl $3,CS(%rdi)
622 je 1f
623 SWAPGS
624 /* irqcount is used to check if a CPU is already on an interrupt
625 stack or not. While this is essentially redundant with preempt_count
626 it is a little cheaper to use a separate counter in the PDA
627 (short of moving irq_enter into assembly, which would be too
628 much work) */
629 1: incl %gs:pda_irqcount
630 cmoveq %gs:pda_irqstackptr,%rsp
631 push %rbp # backlink for old unwinder
632 /*
633 * We entered an interrupt context - irqs are off:
634 */
635 TRACE_IRQS_OFF
636 call \func
637 .endm
638
639 ENTRY(common_interrupt)
640 XCPT_FRAME
641 interrupt do_IRQ
642 /* 0(%rsp): oldrsp-ARGOFFSET */
643 ret_from_intr:
644 DISABLE_INTERRUPTS(CLBR_NONE)
645 TRACE_IRQS_OFF
646 decl %gs:pda_irqcount
647 leaveq
648 CFI_DEF_CFA_REGISTER rsp
649 CFI_ADJUST_CFA_OFFSET -8
650 exit_intr:
651 GET_THREAD_INFO(%rcx)
652 testl $3,CS-ARGOFFSET(%rsp)
653 je retint_kernel
654
655 /* Interrupt came from user space */
656 /*
657 * Has a correct top of stack, but a partial stack frame
658 * %rcx: thread info. Interrupts off.
659 */
660 retint_with_reschedule:
661 movl $_TIF_WORK_MASK,%edi
662 retint_check:
663 LOCKDEP_SYS_EXIT_IRQ
664 movl threadinfo_flags(%rcx),%edx
665 andl %edi,%edx
666 CFI_REMEMBER_STATE
667 jnz retint_careful
668
669 retint_swapgs: /* return to user-space */
670 /*
671 * The iretq could re-enable interrupts:
672 */
673 DISABLE_INTERRUPTS(CLBR_ANY)
674 TRACE_IRQS_IRETQ
675 SWAPGS
676 jmp restore_args
677
678 retint_restore_args: /* return to kernel space */
679 DISABLE_INTERRUPTS(CLBR_ANY)
680 /*
681 * The iretq could re-enable interrupts:
682 */
683 TRACE_IRQS_IRETQ
684 restore_args:
685 RESTORE_ARGS 0,8,0
686
687 irq_return:
688 INTERRUPT_RETURN
689
690 .section __ex_table, "a"
691 .quad irq_return, bad_iret
692 .previous
693
694 #ifdef CONFIG_PARAVIRT
695 ENTRY(native_iret)
696 iretq
697
698 .section __ex_table,"a"
699 .quad native_iret, bad_iret
700 .previous
701 #endif
702
703 .section .fixup,"ax"
704 bad_iret:
705 /*
706 * The iret traps when the %cs or %ss being restored is bogus.
707 * We've lost the original trap vector and error code.
708 * #GPF is the most likely one to get for an invalid selector.
709 * So pretend we completed the iret and took the #GPF in user mode.
710 *
711 * We are now running with the kernel GS after exception recovery.
712 * But error_entry expects us to have user GS to match the user %cs,
713 * so swap back.
714 */
715 pushq $0
716
717 SWAPGS
718 jmp general_protection
719
720 .previous
721
722 /* edi: workmask, edx: work */
723 retint_careful:
724 CFI_RESTORE_STATE
725 bt $TIF_NEED_RESCHED,%edx
726 jnc retint_signal
727 TRACE_IRQS_ON
728 ENABLE_INTERRUPTS(CLBR_NONE)
729 pushq %rdi
730 CFI_ADJUST_CFA_OFFSET 8
731 call schedule
732 popq %rdi
733 CFI_ADJUST_CFA_OFFSET -8
734 GET_THREAD_INFO(%rcx)
735 DISABLE_INTERRUPTS(CLBR_NONE)
736 TRACE_IRQS_OFF
737 jmp retint_check
738
739 retint_signal:
740 testl $_TIF_DO_NOTIFY_MASK,%edx
741 jz retint_swapgs
742 TRACE_IRQS_ON
743 ENABLE_INTERRUPTS(CLBR_NONE)
744 SAVE_REST
745 movq $-1,ORIG_RAX(%rsp)
746 xorl %esi,%esi # oldset
747 movq %rsp,%rdi # &pt_regs
748 call do_notify_resume
749 RESTORE_REST
750 DISABLE_INTERRUPTS(CLBR_NONE)
751 TRACE_IRQS_OFF
752 movl $_TIF_NEED_RESCHED,%edi
753 GET_THREAD_INFO(%rcx)
754 jmp retint_check
755
756 #ifdef CONFIG_PREEMPT
757 /* Returning to kernel space. Check if we need preemption */
758 /* rcx: threadinfo. interrupts off. */
759 ENTRY(retint_kernel)
760 cmpl $0,threadinfo_preempt_count(%rcx)
761 jnz retint_restore_args
762 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
763 jnc retint_restore_args
764 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
765 jnc retint_restore_args
766 call preempt_schedule_irq
767 jmp exit_intr
768 #endif
769
770 CFI_ENDPROC
771 END(common_interrupt)
772
773 /*
774 * APIC interrupts.
775 */
776 .macro apicinterrupt num,func
777 INTR_FRAME
778 pushq $~(\num)
779 CFI_ADJUST_CFA_OFFSET 8
780 interrupt \func
781 jmp ret_from_intr
782 CFI_ENDPROC
783 .endm
784
785 ENTRY(thermal_interrupt)
786 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
787 END(thermal_interrupt)
788
789 ENTRY(threshold_interrupt)
790 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
791 END(threshold_interrupt)
792
793 #ifdef CONFIG_SMP
794 ENTRY(reschedule_interrupt)
795 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
796 END(reschedule_interrupt)
797
798 .macro INVALIDATE_ENTRY num
799 ENTRY(invalidate_interrupt\num)
800 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
801 END(invalidate_interrupt\num)
802 .endm
803
804 INVALIDATE_ENTRY 0
805 INVALIDATE_ENTRY 1
806 INVALIDATE_ENTRY 2
807 INVALIDATE_ENTRY 3
808 INVALIDATE_ENTRY 4
809 INVALIDATE_ENTRY 5
810 INVALIDATE_ENTRY 6
811 INVALIDATE_ENTRY 7
812
813 ENTRY(call_function_interrupt)
814 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
815 END(call_function_interrupt)
816 ENTRY(irq_move_cleanup_interrupt)
817 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
818 END(irq_move_cleanup_interrupt)
819 #endif
820
821 ENTRY(apic_timer_interrupt)
822 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
823 END(apic_timer_interrupt)
824
825 ENTRY(error_interrupt)
826 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
827 END(error_interrupt)
828
829 ENTRY(spurious_interrupt)
830 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
831 END(spurious_interrupt)
832
833 /*
834 * Exception entry points.
835 */
836 .macro zeroentry sym
837 INTR_FRAME
838 pushq $0 /* push error code/oldrax */
839 CFI_ADJUST_CFA_OFFSET 8
840 pushq %rax /* push real oldrax to the rdi slot */
841 CFI_ADJUST_CFA_OFFSET 8
842 CFI_REL_OFFSET rax,0
843 leaq \sym(%rip),%rax
844 jmp error_entry
845 CFI_ENDPROC
846 .endm
847
848 .macro errorentry sym
849 XCPT_FRAME
850 pushq %rax
851 CFI_ADJUST_CFA_OFFSET 8
852 CFI_REL_OFFSET rax,0
853 leaq \sym(%rip),%rax
854 jmp error_entry
855 CFI_ENDPROC
856 .endm
857
858 /* error code is on the stack already */
859 /* handle NMI like exceptions that can happen everywhere */
860 .macro paranoidentry sym, ist=0, irqtrace=1
861 SAVE_ALL
862 cld
863 movl $1,%ebx
864 movl $MSR_GS_BASE,%ecx
865 rdmsr
866 testl %edx,%edx
867 js 1f
868 SWAPGS
869 xorl %ebx,%ebx
870 1:
871 .if \ist
872 movq %gs:pda_data_offset, %rbp
873 .endif
874 movq %rsp,%rdi
875 movq ORIG_RAX(%rsp),%rsi
876 movq $-1,ORIG_RAX(%rsp)
877 .if \ist
878 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
879 .endif
880 call \sym
881 .if \ist
882 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
883 .endif
884 DISABLE_INTERRUPTS(CLBR_NONE)
885 .if \irqtrace
886 TRACE_IRQS_OFF
887 .endif
888 .endm
889
890 /*
891 * "Paranoid" exit path from exception stack.
892 * Paranoid because this is used by NMIs and cannot take
893 * any kernel state for granted.
894 * We don't do kernel preemption checks here, because only
895 * NMI should be common and it does not enable IRQs and
896 * cannot get reschedule ticks.
897 *
898 * "trace" is 0 for the NMI handler only, because irq-tracing
899 * is fundamentally NMI-unsafe. (we cannot change the soft and
900 * hard flags at once, atomically)
901 */
902 .macro paranoidexit trace=1
903 /* ebx: no swapgs flag */
904 paranoid_exit\trace:
905 testl %ebx,%ebx /* swapgs needed? */
906 jnz paranoid_restore\trace
907 testl $3,CS(%rsp)
908 jnz paranoid_userspace\trace
909 paranoid_swapgs\trace:
910 .if \trace
911 TRACE_IRQS_IRETQ 0
912 .endif
913 SWAPGS_UNSAFE_STACK
914 paranoid_restore\trace:
915 RESTORE_ALL 8
916 jmp irq_return
917 paranoid_userspace\trace:
918 GET_THREAD_INFO(%rcx)
919 movl threadinfo_flags(%rcx),%ebx
920 andl $_TIF_WORK_MASK,%ebx
921 jz paranoid_swapgs\trace
922 movq %rsp,%rdi /* &pt_regs */
923 call sync_regs
924 movq %rax,%rsp /* switch stack for scheduling */
925 testl $_TIF_NEED_RESCHED,%ebx
926 jnz paranoid_schedule\trace
927 movl %ebx,%edx /* arg3: thread flags */
928 .if \trace
929 TRACE_IRQS_ON
930 .endif
931 ENABLE_INTERRUPTS(CLBR_NONE)
932 xorl %esi,%esi /* arg2: oldset */
933 movq %rsp,%rdi /* arg1: &pt_regs */
934 call do_notify_resume
935 DISABLE_INTERRUPTS(CLBR_NONE)
936 .if \trace
937 TRACE_IRQS_OFF
938 .endif
939 jmp paranoid_userspace\trace
940 paranoid_schedule\trace:
941 .if \trace
942 TRACE_IRQS_ON
943 .endif
944 ENABLE_INTERRUPTS(CLBR_ANY)
945 call schedule
946 DISABLE_INTERRUPTS(CLBR_ANY)
947 .if \trace
948 TRACE_IRQS_OFF
949 .endif
950 jmp paranoid_userspace\trace
951 CFI_ENDPROC
952 .endm
953
954 /*
955 * Exception entry point. This expects an error code/orig_rax on the stack
956 * and the exception handler in %rax.
957 */
958 KPROBE_ENTRY(error_entry)
959 _frame RDI
960 CFI_REL_OFFSET rax,0
961 /* rdi slot contains rax, oldrax contains error code */
962 cld
963 subq $14*8,%rsp
964 CFI_ADJUST_CFA_OFFSET (14*8)
965 movq %rsi,13*8(%rsp)
966 CFI_REL_OFFSET rsi,RSI
967 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
968 CFI_REGISTER rax,rsi
969 movq %rdx,12*8(%rsp)
970 CFI_REL_OFFSET rdx,RDX
971 movq %rcx,11*8(%rsp)
972 CFI_REL_OFFSET rcx,RCX
973 movq %rsi,10*8(%rsp) /* store rax */
974 CFI_REL_OFFSET rax,RAX
975 movq %r8, 9*8(%rsp)
976 CFI_REL_OFFSET r8,R8
977 movq %r9, 8*8(%rsp)
978 CFI_REL_OFFSET r9,R9
979 movq %r10,7*8(%rsp)
980 CFI_REL_OFFSET r10,R10
981 movq %r11,6*8(%rsp)
982 CFI_REL_OFFSET r11,R11
983 movq %rbx,5*8(%rsp)
984 CFI_REL_OFFSET rbx,RBX
985 movq %rbp,4*8(%rsp)
986 CFI_REL_OFFSET rbp,RBP
987 movq %r12,3*8(%rsp)
988 CFI_REL_OFFSET r12,R12
989 movq %r13,2*8(%rsp)
990 CFI_REL_OFFSET r13,R13
991 movq %r14,1*8(%rsp)
992 CFI_REL_OFFSET r14,R14
993 movq %r15,(%rsp)
994 CFI_REL_OFFSET r15,R15
995 xorl %ebx,%ebx
996 testl $3,CS(%rsp)
997 je error_kernelspace
998 error_swapgs:
999 SWAPGS
1000 error_sti:
1001 movq %rdi,RDI(%rsp)
1002 CFI_REL_OFFSET rdi,RDI
1003 movq %rsp,%rdi
1004 movq ORIG_RAX(%rsp),%rsi /* get error code */
1005 movq $-1,ORIG_RAX(%rsp)
1006 call *%rax
1007 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1008 error_exit:
1009 movl %ebx,%eax
1010 RESTORE_REST
1011 DISABLE_INTERRUPTS(CLBR_NONE)
1012 TRACE_IRQS_OFF
1013 GET_THREAD_INFO(%rcx)
1014 testl %eax,%eax
1015 jne retint_kernel
1016 LOCKDEP_SYS_EXIT_IRQ
1017 movl threadinfo_flags(%rcx),%edx
1018 movl $_TIF_WORK_MASK,%edi
1019 andl %edi,%edx
1020 jnz retint_careful
1021 jmp retint_swapgs
1022 CFI_ENDPROC
1023
1024 error_kernelspace:
1025 incl %ebx
1026 /* There are two places in the kernel that can potentially fault with
1027 usergs. Handle them here. The exception handlers after
1028 iret run with kernel gs again, so don't set the user space flag.
1029 B stepping K8s sometimes report an truncated RIP for IRET
1030 exceptions returning to compat mode. Check for these here too. */
1031 leaq irq_return(%rip),%rbp
1032 cmpq %rbp,RIP(%rsp)
1033 je error_swapgs
1034 movl %ebp,%ebp /* zero extend */
1035 cmpq %rbp,RIP(%rsp)
1036 je error_swapgs
1037 cmpq $gs_change,RIP(%rsp)
1038 je error_swapgs
1039 jmp error_sti
1040 KPROBE_END(error_entry)
1041
1042 /* Reload gs selector with exception handling */
1043 /* edi: new selector */
1044 ENTRY(load_gs_index)
1045 CFI_STARTPROC
1046 pushf
1047 CFI_ADJUST_CFA_OFFSET 8
1048 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1049 SWAPGS
1050 gs_change:
1051 movl %edi,%gs
1052 2: mfence /* workaround */
1053 SWAPGS
1054 popf
1055 CFI_ADJUST_CFA_OFFSET -8
1056 ret
1057 CFI_ENDPROC
1058 ENDPROC(load_gs_index)
1059
1060 .section __ex_table,"a"
1061 .align 8
1062 .quad gs_change,bad_gs
1063 .previous
1064 .section .fixup,"ax"
1065 /* running with kernelgs */
1066 bad_gs:
1067 SWAPGS /* switch back to user gs */
1068 xorl %eax,%eax
1069 movl %eax,%gs
1070 jmp 2b
1071 .previous
1072
1073 /*
1074 * Create a kernel thread.
1075 *
1076 * C extern interface:
1077 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1078 *
1079 * asm input arguments:
1080 * rdi: fn, rsi: arg, rdx: flags
1081 */
1082 ENTRY(kernel_thread)
1083 CFI_STARTPROC
1084 FAKE_STACK_FRAME $child_rip
1085 SAVE_ALL
1086
1087 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1088 movq %rdx,%rdi
1089 orq kernel_thread_flags(%rip),%rdi
1090 movq $-1, %rsi
1091 movq %rsp, %rdx
1092
1093 xorl %r8d,%r8d
1094 xorl %r9d,%r9d
1095
1096 # clone now
1097 call do_fork
1098 movq %rax,RAX(%rsp)
1099 xorl %edi,%edi
1100
1101 /*
1102 * It isn't worth to check for reschedule here,
1103 * so internally to the x86_64 port you can rely on kernel_thread()
1104 * not to reschedule the child before returning, this avoids the need
1105 * of hacks for example to fork off the per-CPU idle tasks.
1106 * [Hopefully no generic code relies on the reschedule -AK]
1107 */
1108 RESTORE_ALL
1109 UNFAKE_STACK_FRAME
1110 ret
1111 CFI_ENDPROC
1112 ENDPROC(kernel_thread)
1113
1114 child_rip:
1115 pushq $0 # fake return address
1116 CFI_STARTPROC
1117 /*
1118 * Here we are in the child and the registers are set as they were
1119 * at kernel_thread() invocation in the parent.
1120 */
1121 movq %rdi, %rax
1122 movq %rsi, %rdi
1123 call *%rax
1124 # exit
1125 mov %eax, %edi
1126 call do_exit
1127 CFI_ENDPROC
1128 ENDPROC(child_rip)
1129
1130 /*
1131 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1132 *
1133 * C extern interface:
1134 * extern long execve(char *name, char **argv, char **envp)
1135 *
1136 * asm input arguments:
1137 * rdi: name, rsi: argv, rdx: envp
1138 *
1139 * We want to fallback into:
1140 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1141 *
1142 * do_sys_execve asm fallback arguments:
1143 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1144 */
1145 ENTRY(kernel_execve)
1146 CFI_STARTPROC
1147 FAKE_STACK_FRAME $0
1148 SAVE_ALL
1149 movq %rsp,%rcx
1150 call sys_execve
1151 movq %rax, RAX(%rsp)
1152 RESTORE_REST
1153 testq %rax,%rax
1154 je int_ret_from_sys_call
1155 RESTORE_ARGS
1156 UNFAKE_STACK_FRAME
1157 ret
1158 CFI_ENDPROC
1159 ENDPROC(kernel_execve)
1160
1161 KPROBE_ENTRY(page_fault)
1162 errorentry do_page_fault
1163 KPROBE_END(page_fault)
1164
1165 ENTRY(coprocessor_error)
1166 zeroentry do_coprocessor_error
1167 END(coprocessor_error)
1168
1169 ENTRY(simd_coprocessor_error)
1170 zeroentry do_simd_coprocessor_error
1171 END(simd_coprocessor_error)
1172
1173 ENTRY(device_not_available)
1174 zeroentry math_state_restore
1175 END(device_not_available)
1176
1177 /* runs on exception stack */
1178 KPROBE_ENTRY(debug)
1179 INTR_FRAME
1180 pushq $0
1181 CFI_ADJUST_CFA_OFFSET 8
1182 paranoidentry do_debug, DEBUG_STACK
1183 paranoidexit
1184 KPROBE_END(debug)
1185
1186 /* runs on exception stack */
1187 KPROBE_ENTRY(nmi)
1188 INTR_FRAME
1189 pushq $-1
1190 CFI_ADJUST_CFA_OFFSET 8
1191 paranoidentry do_nmi, 0, 0
1192 #ifdef CONFIG_TRACE_IRQFLAGS
1193 paranoidexit 0
1194 #else
1195 jmp paranoid_exit1
1196 CFI_ENDPROC
1197 #endif
1198 KPROBE_END(nmi)
1199
1200 KPROBE_ENTRY(int3)
1201 INTR_FRAME
1202 pushq $0
1203 CFI_ADJUST_CFA_OFFSET 8
1204 paranoidentry do_int3, DEBUG_STACK
1205 jmp paranoid_exit1
1206 CFI_ENDPROC
1207 KPROBE_END(int3)
1208
1209 ENTRY(overflow)
1210 zeroentry do_overflow
1211 END(overflow)
1212
1213 ENTRY(bounds)
1214 zeroentry do_bounds
1215 END(bounds)
1216
1217 ENTRY(invalid_op)
1218 zeroentry do_invalid_op
1219 END(invalid_op)
1220
1221 ENTRY(coprocessor_segment_overrun)
1222 zeroentry do_coprocessor_segment_overrun
1223 END(coprocessor_segment_overrun)
1224
1225 ENTRY(reserved)
1226 zeroentry do_reserved
1227 END(reserved)
1228
1229 /* runs on exception stack */
1230 ENTRY(double_fault)
1231 XCPT_FRAME
1232 paranoidentry do_double_fault
1233 jmp paranoid_exit1
1234 CFI_ENDPROC
1235 END(double_fault)
1236
1237 ENTRY(invalid_TSS)
1238 errorentry do_invalid_TSS
1239 END(invalid_TSS)
1240
1241 ENTRY(segment_not_present)
1242 errorentry do_segment_not_present
1243 END(segment_not_present)
1244
1245 /* runs on exception stack */
1246 ENTRY(stack_segment)
1247 XCPT_FRAME
1248 paranoidentry do_stack_segment
1249 jmp paranoid_exit1
1250 CFI_ENDPROC
1251 END(stack_segment)
1252
1253 KPROBE_ENTRY(general_protection)
1254 errorentry do_general_protection
1255 KPROBE_END(general_protection)
1256
1257 ENTRY(alignment_check)
1258 errorentry do_alignment_check
1259 END(alignment_check)
1260
1261 ENTRY(divide_error)
1262 zeroentry do_divide_error
1263 END(divide_error)
1264
1265 ENTRY(spurious_interrupt_bug)
1266 zeroentry do_spurious_interrupt_bug
1267 END(spurious_interrupt_bug)
1268
1269 #ifdef CONFIG_X86_MCE
1270 /* runs on exception stack */
1271 ENTRY(machine_check)
1272 INTR_FRAME
1273 pushq $0
1274 CFI_ADJUST_CFA_OFFSET 8
1275 paranoidentry do_machine_check
1276 jmp paranoid_exit1
1277 CFI_ENDPROC
1278 END(machine_check)
1279 #endif
1280
1281 /* Call softirq on interrupt stack. Interrupts are off. */
1282 ENTRY(call_softirq)
1283 CFI_STARTPROC
1284 push %rbp
1285 CFI_ADJUST_CFA_OFFSET 8
1286 CFI_REL_OFFSET rbp,0
1287 mov %rsp,%rbp
1288 CFI_DEF_CFA_REGISTER rbp
1289 incl %gs:pda_irqcount
1290 cmove %gs:pda_irqstackptr,%rsp
1291 push %rbp # backlink for old unwinder
1292 call __do_softirq
1293 leaveq
1294 CFI_DEF_CFA_REGISTER rsp
1295 CFI_ADJUST_CFA_OFFSET -8
1296 decl %gs:pda_irqcount
1297 ret
1298 CFI_ENDPROC
1299 ENDPROC(call_softirq)
1300
1301 KPROBE_ENTRY(ignore_sysret)
1302 CFI_STARTPROC
1303 mov $-ENOSYS,%eax
1304 sysret
1305 CFI_ENDPROC
1306 ENDPROC(ignore_sysret)
This page took 0.0583 seconds and 5 git commands to generate.