2 * linux/arch/x86_64/entry.S
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
12 * entry.S contains the system-call and fault low-level handling routines.
14 * NOTE: This code handles signal-recognition, which happens every time
15 * after an interrupt and after each system call.
17 * Normal syscalls and interrupts don't save a full stack frame, this is
18 * only done for syscall tracing, signals or fork/exec et.al.
20 * A note on terminology:
21 * - top of stack: Architecture defined interrupt frame from SS to RIP
22 * at the top of the kernel process stack.
23 * - partial stack frame: partially saved registers upto R11.
24 * - full stack frame: Like partial stack frame, but all register saved.
27 * - schedule it carefully for the final hardware.
31 #include <linux/config.h>
32 #include <linux/linkage.h>
33 #include <asm/segment.h>
35 #include <asm/cache.h>
36 #include <asm/errno.h>
37 #include <asm/dwarf2.h>
38 #include <asm/calling.h>
39 #include <asm/offset.h>
41 #include <asm/unistd.h>
42 #include <asm/thread_info.h>
43 #include <asm/hw_irq.h>
48 #define preempt_stop cli
51 #define retint_kernel retint_restore_args
55 * C code is not supposed to know about undefined top of stack. Every time
56 * a C function with an pt_regs argument is called from the SYSCALL based
57 * fast path FIXUP_TOP_OF_STACK is needed.
58 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
62 /* %rsp:at FRAMEEND */
63 .macro FIXUP_TOP_OF_STACK tmp
64 movq %gs:pda_oldrsp,\tmp
66 movq $__USER_DS,SS(%rsp)
67 movq $__USER_CS,CS(%rsp)
69 movq R11(%rsp),\tmp /* get eflags */
70 movq \tmp,EFLAGS(%rsp)
73 .macro RESTORE_TOP_OF_STACK tmp,offset=0
74 movq RSP-\offset(%rsp),\tmp
75 movq \tmp,%gs:pda_oldrsp
76 movq EFLAGS-\offset(%rsp),\tmp
77 movq \tmp,R11-\offset(%rsp)
80 .macro FAKE_STACK_FRAME child_rip
81 /* push in order ss, rsp, eflags, cs, rip */
84 CFI_ADJUST_CFA_OFFSET 8
86 CFI_ADJUST_CFA_OFFSET 8
88 pushq $(1<<9) /* eflags - interrupts on */
89 CFI_ADJUST_CFA_OFFSET 8
90 pushq $__KERNEL_CS /* cs */
91 CFI_ADJUST_CFA_OFFSET 8
92 pushq \child_rip /* rip */
93 CFI_ADJUST_CFA_OFFSET 8
95 pushq %rax /* orig rax */
96 CFI_ADJUST_CFA_OFFSET 8
99 .macro UNFAKE_STACK_FRAME
101 CFI_ADJUST_CFA_OFFSET -(6*8)
104 .macro CFI_DEFAULT_STACK
105 CFI_ADJUST_CFA_OFFSET (SS)
106 CFI_OFFSET r15,R15-SS
107 CFI_OFFSET r14,R14-SS
108 CFI_OFFSET r13,R13-SS
109 CFI_OFFSET r12,R12-SS
110 CFI_OFFSET rbp,RBP-SS
111 CFI_OFFSET rbx,RBX-SS
112 CFI_OFFSET r11,R11-SS
113 CFI_OFFSET r10,R10-SS
116 CFI_OFFSET rax,RAX-SS
117 CFI_OFFSET rcx,RCX-SS
118 CFI_OFFSET rdx,RDX-SS
119 CFI_OFFSET rsi,RSI-SS
120 CFI_OFFSET rdi,RDI-SS
121 CFI_OFFSET rsp,RSP-SS
122 CFI_OFFSET rip,RIP-SS
125 * A newly forked process directly context switches into this.
132 GET_THREAD_INFO(%rcx)
133 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
137 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
138 je int_ret_from_sys_call
139 testl $_TIF_IA32,threadinfo_flags(%rcx)
140 jnz int_ret_from_sys_call
141 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
142 jmp ret_from_sys_call
145 call syscall_trace_leave
146 GET_THREAD_INFO(%rcx)
151 * System call entry. Upto 6 arguments in registers are supported.
153 * SYSCALL does not save anything on the stack and does not change the
159 * rax system call number
161 * rcx return address for syscall/sysret, C arg3
164 * r10 arg3 (--> moved to rcx for C)
167 * r11 eflags for syscall/sysret, temporary for C
168 * r12-r15,rbp,rbx saved by C code, not touched.
170 * Interrupts are off on entry.
171 * Only called from user space.
173 * XXX if we had a free scratch register we could save the RSP into the stack frame
174 * and report it properly in ps. Unfortunately we haven't.
180 movq %rsp,%gs:pda_oldrsp
181 movq %gs:pda_kernelstack,%rsp
184 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
185 movq %rcx,RIP-ARGOFFSET(%rsp)
186 GET_THREAD_INFO(%rcx)
187 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
189 cmpq $__NR_syscall_max,%rax
192 call *sys_call_table(,%rax,8) # XXX: rip relative
193 movq %rax,RAX-ARGOFFSET(%rsp)
195 * Syscall return path ending with SYSRET (fast path)
196 * Has incomplete stack frame and undefined top of stack.
198 .globl ret_from_sys_call
200 movl $_TIF_WORK_MASK,%edi
203 GET_THREAD_INFO(%rcx)
205 movl threadinfo_flags(%rcx),%edx
208 movq RIP-ARGOFFSET(%rsp),%rcx
209 RESTORE_ARGS 0,-ARG_SKIP,1
210 movq %gs:pda_oldrsp,%rsp
214 /* Handle reschedules */
215 /* edx: work, edi: workmask */
217 bt $TIF_NEED_RESCHED,%edx
225 /* Handle a signal */
228 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
231 /* Really a signal */
232 /* edx: work flags (arg3) */
233 leaq do_notify_resume(%rip),%rax
234 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
235 xorl %esi,%esi # oldset -> arg2
236 call ptregscall_common
237 1: movl $_TIF_NEED_RESCHED,%edi
240 /* Do syscall tracing */
243 movq $-ENOSYS,RAX(%rsp)
244 FIXUP_TOP_OF_STACK %rdi
246 call syscall_trace_enter
247 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
249 cmpq $__NR_syscall_max,%rax
251 movq %r10,%rcx /* fixup for C */
252 call *sys_call_table(,%rax,8)
253 movq %rax,RAX-ARGOFFSET(%rsp)
256 call syscall_trace_leave
257 RESTORE_TOP_OF_STACK %rbx
259 jmp ret_from_sys_call
262 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
263 jmp ret_from_sys_call
266 * Syscall return path ending with IRET.
267 * Has correct top of stack, but partial stack frame.
269 ENTRY(int_ret_from_sys_call)
271 testl $3,CS-ARGOFFSET(%rsp)
272 je retint_restore_args
273 movl $_TIF_ALLWORK_MASK,%edi
274 /* edi: mask to check */
276 GET_THREAD_INFO(%rcx)
277 movl threadinfo_flags(%rcx),%edx
282 /* Either reschedule or signal or syscall exit tracking needed. */
283 /* First do a reschedule test. */
284 /* edx: work, edi: workmask */
286 bt $TIF_NEED_RESCHED,%edx
294 /* handle signals and tracing -- both require a full stack frame */
298 /* Check for syscall exit trace */
299 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
302 leaq 8(%rsp),%rdi # &ptregs -> arg1
303 call syscall_trace_leave
305 btr $TIF_SYSCALL_TRACE,%edi
306 btr $TIF_SYSCALL_AUDIT,%edi
307 btr $TIF_SINGLESTEP,%edi
311 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
313 movq %rsp,%rdi # &ptregs -> arg1
314 xorl %esi,%esi # oldset -> arg2
315 call do_notify_resume
316 1: movl $_TIF_NEED_RESCHED,%edi
323 * Certain special system calls that need to save a complete full stack frame.
326 .macro PTREGSCALL label,func,arg
329 leaq \func(%rip),%rax
330 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
331 jmp ptregscall_common
334 PTREGSCALL stub_clone, sys_clone, %r8
335 PTREGSCALL stub_fork, sys_fork, %rdi
336 PTREGSCALL stub_vfork, sys_vfork, %rdi
337 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
338 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
339 PTREGSCALL stub_iopl, sys_iopl, %rsi
341 ENTRY(ptregscall_common)
344 CFI_ADJUST_CFA_OFFSET -8
347 FIXUP_TOP_OF_STACK %r11
349 RESTORE_TOP_OF_STACK %r11
353 CFI_ADJUST_CFA_OFFSET 8
360 CFI_ADJUST_CFA_OFFSET -8
363 FIXUP_TOP_OF_STACK %r11
365 GET_THREAD_INFO(%rcx)
366 bt $TIF_IA32,threadinfo_flags(%rcx)
368 RESTORE_TOP_OF_STACK %r11
375 CFI_ADJUST_CFA_OFFSET REST_SKIP
378 jmp int_ret_from_sys_call
382 * sigreturn is special because it needs to restore all registers on return.
383 * This cannot be done with SYSRET, so use the IRET return path instead.
385 ENTRY(stub_rt_sigreturn)
390 FIXUP_TOP_OF_STACK %r11
391 call sys_rt_sigreturn
392 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
394 jmp int_ret_from_sys_call
398 * Interrupt entry/exit.
400 * Interrupt entry points save only callee clobbered registers in fast path.
402 * Entry runs with interrupts off.
405 /* 0(%rsp): interrupt number */
406 .macro interrupt func
408 CFI_DEF_CFA rsp,(SS-RDI)
409 CFI_REL_OFFSET rsp,(RSP-ORIG_RAX)
410 CFI_REL_OFFSET rip,(RIP-ORIG_RAX)
412 #ifdef CONFIG_DEBUG_INFO
416 * Setup a stack frame pointer. This allows gdb to trace
417 * back to the original stack.
420 CFI_DEF_CFA_REGISTER rbp
423 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
428 1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
429 movq %gs:pda_irqstackptr,%rax
431 pushq %rdi # save old stack
435 ENTRY(common_interrupt)
437 /* 0(%rsp): oldrsp-ARGOFFSET */
441 subl $1,%gs:pda_irqcount
442 #ifdef CONFIG_DEBUG_INFO
445 leaq ARGOFFSET(%rdi),%rsp
447 GET_THREAD_INFO(%rcx)
448 testl $3,CS-ARGOFFSET(%rsp)
451 /* Interrupt came from user space */
453 * Has a correct top of stack, but a partial stack frame
454 * %rcx: thread info. Interrupts off.
456 retint_with_reschedule:
457 movl $_TIF_WORK_MASK,%edi
459 movl threadinfo_flags(%rcx),%edx
471 .section __ex_table,"a"
472 .quad iret_label,bad_iret
475 /* force a signal here? this matches i386 behaviour */
476 /* running with kernel gs */
478 movq $-9999,%rdi /* better code? */
482 /* edi: workmask, edx: work */
484 bt $TIF_NEED_RESCHED,%edx
490 GET_THREAD_INFO(%rcx)
495 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
499 movq $-1,ORIG_RAX(%rsp)
500 xorq %rsi,%rsi # oldset
501 movq %rsp,%rdi # &pt_regs
502 call do_notify_resume
505 movl $_TIF_NEED_RESCHED,%edi
506 GET_THREAD_INFO(%rcx)
509 #ifdef CONFIG_PREEMPT
510 /* Returning to kernel space. Check if we need preemption */
511 /* rcx: threadinfo. interrupts off. */
514 cmpl $0,threadinfo_preempt_count(%rcx)
515 jnz retint_restore_args
516 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
517 jnc retint_restore_args
518 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
519 jnc retint_restore_args
520 call preempt_schedule_irq
528 .macro apicinterrupt num,func
535 ENTRY(thermal_interrupt)
536 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
539 ENTRY(reschedule_interrupt)
540 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
542 ENTRY(invalidate_interrupt)
543 apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt
545 ENTRY(call_function_interrupt)
546 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
549 #ifdef CONFIG_X86_LOCAL_APIC
550 ENTRY(apic_timer_interrupt)
551 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
553 ENTRY(error_interrupt)
554 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
556 ENTRY(spurious_interrupt)
557 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
561 * Exception entry points.
564 pushq $0 /* push error code/oldrax */
565 pushq %rax /* push real oldrax to the rdi slot */
570 .macro errorentry sym
576 /* error code is on the stack already */
577 /* handle NMI like exceptions that can happen everywhere */
578 .macro paranoidentry sym
582 movl $MSR_GS_BASE,%ecx
589 movq ORIG_RAX(%rsp),%rsi
590 movq $-1,ORIG_RAX(%rsp)
595 * Exception entry point. This expects an error code/orig_rax on the stack
596 * and the exception handler in %rax.
600 CFI_DEF_CFA rsp,(SS-RDI)
601 CFI_REL_OFFSET rsp,(RSP-RDI)
602 CFI_REL_OFFSET rip,(RIP-RDI)
603 /* rdi slot contains rax, oldrax contains error code */
606 CFI_ADJUST_CFA_OFFSET (14*8)
608 CFI_REL_OFFSET rsi,RSI
609 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
611 CFI_REL_OFFSET rdx,RDX
613 CFI_REL_OFFSET rcx,RCX
614 movq %rsi,10*8(%rsp) /* store rax */
615 CFI_REL_OFFSET rax,RAX
621 CFI_REL_OFFSET r10,R10
623 CFI_REL_OFFSET r11,R11
625 CFI_REL_OFFSET rbx,RBX
627 CFI_REL_OFFSET rbp,RBP
629 CFI_REL_OFFSET r12,R12
631 CFI_REL_OFFSET r13,R13
633 CFI_REL_OFFSET r14,R14
635 CFI_REL_OFFSET r15,R15
644 movq ORIG_RAX(%rsp),%rsi /* get error code */
645 movq $-1,ORIG_RAX(%rsp)
647 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
652 GET_THREAD_INFO(%rcx)
655 movl threadinfo_flags(%rcx),%edx
656 movl $_TIF_WORK_MASK,%edi
666 /* There are two places in the kernel that can potentially fault with
667 usergs. Handle them here. The exception handlers after
668 iret run with kernel gs again, so don't set the user space flag.
669 B stepping K8s sometimes report an truncated RIP for IRET
670 exceptions returning to compat mode. Check for these here too. */
671 leaq iret_label(%rip),%rbp
674 movl %ebp,%ebp /* zero extend */
677 cmpq $gs_change,RIP(%rsp)
681 /* Reload gs selector with exception handling */
682 /* edi: new selector */
689 2: mfence /* workaround */
694 .section __ex_table,"a"
696 .quad gs_change,bad_gs
699 /* running with kernelgs */
701 swapgs /* switch back to user gs */
708 * Create a kernel thread.
710 * C extern interface:
711 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
713 * asm input arguments:
714 * rdi: fn, rsi: arg, rdx: flags
718 FAKE_STACK_FRAME $child_rip
721 # rdi: flags, rsi: usp, rdx: will be &pt_regs
723 orq kernel_thread_flags(%rip),%rdi
736 * It isn't worth to check for reschedule here,
737 * so internally to the x86_64 port you can rely on kernel_thread()
738 * not to reschedule the child before returning, this avoids the need
739 * of hacks for example to fork off the per-CPU idle tasks.
740 * [Hopefully no generic code relies on the reschedule -AK]
750 * Here we are in the child and the registers are set as they were
751 * at kernel_thread() invocation in the parent.
761 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
763 * C extern interface:
764 * extern long execve(char *name, char **argv, char **envp)
766 * asm input arguments:
767 * rdi: name, rsi: argv, rdx: envp
769 * We want to fallback into:
770 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
772 * do_sys_execve asm fallback arguments:
773 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
783 je int_ret_from_sys_call
790 errorentry do_page_fault
792 ENTRY(coprocessor_error)
793 zeroentry do_coprocessor_error
795 ENTRY(simd_coprocessor_error)
796 zeroentry do_simd_coprocessor_error
798 ENTRY(device_not_available)
799 zeroentry math_state_restore
801 /* runs on exception stack */
805 CFI_ADJUST_CFA_OFFSET 8
806 paranoidentry do_debug
807 /* switch back to process stack to restore the state ptrace touched */
810 jnz paranoid_userspace
814 /* runs on exception stack */
818 CFI_ADJUST_CFA_OFFSET 8
820 /* ebx: no swapgs flag */
822 testl %ebx,%ebx /* swapgs needed? */
832 GET_THREAD_INFO(%rcx)
833 movl threadinfo_flags(%rcx),%edx
834 testl $_TIF_NEED_RESCHED,%edx
836 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
845 xorl %esi,%esi /* oldset */
846 movq %rsp,%rdi /* &pt_regs */
847 call do_notify_resume
855 zeroentry do_overflow
861 zeroentry do_invalid_op
863 ENTRY(coprocessor_segment_overrun)
864 zeroentry do_coprocessor_segment_overrun
867 zeroentry do_reserved
869 /* runs on exception stack */
872 paranoidentry do_double_fault
875 jnz paranoid_userspace
880 errorentry do_invalid_TSS
882 ENTRY(segment_not_present)
883 errorentry do_segment_not_present
885 /* runs on exception stack */
888 paranoidentry do_stack_segment
891 jnz paranoid_userspace
895 ENTRY(general_protection)
896 errorentry do_general_protection
898 ENTRY(alignment_check)
899 errorentry do_alignment_check
902 zeroentry do_divide_error
904 ENTRY(spurious_interrupt_bug)
905 zeroentry do_spurious_interrupt_bug
907 #ifdef CONFIG_X86_MCE
908 /* runs on exception stack */
912 CFI_ADJUST_CFA_OFFSET 8
913 paranoidentry do_machine_check
919 zeroentry do_call_debug