2 * Copyright (C) 1995 Linus Torvalds
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
10 * CPU hotplug support - ashok.raj@intel.com
14 * This file handles the architecture-dependent parts of process handling..
17 #include <linux/stackprotector.h>
18 #include <linux/cpu.h>
19 #include <linux/errno.h>
20 #include <linux/sched.h>
22 #include <linux/kernel.h>
24 #include <linux/elfcore.h>
25 #include <linux/smp.h>
26 #include <linux/slab.h>
27 #include <linux/user.h>
28 #include <linux/interrupt.h>
29 #include <linux/utsname.h>
30 #include <linux/delay.h>
31 #include <linux/module.h>
32 #include <linux/ptrace.h>
33 #include <linux/notifier.h>
34 #include <linux/kprobes.h>
35 #include <linux/kdebug.h>
36 #include <linux/tick.h>
37 #include <linux/prctl.h>
38 #include <linux/uaccess.h>
40 #include <linux/ftrace.h>
41 #include <linux/dmi.h>
43 #include <asm/pgtable.h>
44 #include <asm/system.h>
45 #include <asm/processor.h>
47 #include <asm/mmu_context.h>
48 #include <asm/prctl.h>
50 #include <asm/proto.h>
53 #include <asm/syscalls.h>
55 #include <asm/debugreg.h>
57 asmlinkage
extern void ret_from_fork(void);
59 DEFINE_PER_CPU(unsigned long, old_rsp
);
60 static DEFINE_PER_CPU(unsigned char, is_idle
);
62 unsigned long kernel_thread_flags
= CLONE_VM
| CLONE_UNTRACED
;
64 static ATOMIC_NOTIFIER_HEAD(idle_notifier
);
66 void idle_notifier_register(struct notifier_block
*n
)
68 atomic_notifier_chain_register(&idle_notifier
, n
);
70 EXPORT_SYMBOL_GPL(idle_notifier_register
);
72 void idle_notifier_unregister(struct notifier_block
*n
)
74 atomic_notifier_chain_unregister(&idle_notifier
, n
);
76 EXPORT_SYMBOL_GPL(idle_notifier_unregister
);
80 percpu_write(is_idle
, 1);
81 atomic_notifier_call_chain(&idle_notifier
, IDLE_START
, NULL
);
84 static void __exit_idle(void)
86 if (x86_test_and_clear_bit_percpu(0, is_idle
) == 0)
88 atomic_notifier_call_chain(&idle_notifier
, IDLE_END
, NULL
);
91 /* Called from interrupts to signify idle end */
94 /* idle loop has pid 0 */
101 static inline void play_dead(void)
108 * The idle thread. There's no useful work to be
109 * done, so just try to conserve power and have a
110 * low exit latency (ie sit in a loop waiting for
111 * somebody to say that they'd like to reschedule)
115 current_thread_info()->status
|= TS_POLLING
;
118 * If we're the non-boot CPU, nothing set the stack canary up
119 * for us. CPU0 already has it initialized but no harm in
120 * doing it again. This is a good place for updating it, as
121 * we wont ever return from this function (so the invalid
122 * canaries already on the stack wont ever trigger).
124 boot_init_stack_canary();
126 /* endless idle loop with no priority at all */
128 tick_nohz_stop_sched_tick(1);
129 while (!need_resched()) {
133 if (cpu_is_offline(smp_processor_id()))
136 * Idle routines should keep interrupts disabled
137 * from here on, until they go to idle.
138 * Otherwise, idle callbacks can misfire.
142 /* Don't trace irqs off for idle */
143 stop_critical_timings();
145 start_critical_timings();
146 /* In many cases the interrupt that ended idle
147 has already called exit_idle. But some idle
148 loops can be woken up without interrupt. */
152 tick_nohz_restart_sched_tick();
153 preempt_enable_no_resched();
159 /* Prints also some state that isn't saved in the pt_regs */
160 void __show_regs(struct pt_regs
*regs
, int all
)
162 unsigned long cr0
= 0L, cr2
= 0L, cr3
= 0L, cr4
= 0L, fs
, gs
, shadowgs
;
163 unsigned long d0
, d1
, d2
, d3
, d6
, d7
;
164 unsigned int fsindex
, gsindex
;
165 unsigned int ds
, cs
, es
;
170 board
= dmi_get_system_info(DMI_PRODUCT_NAME
);
173 printk(KERN_INFO
"Pid: %d, comm: %.20s %s %s %.*s %s\n",
174 current
->pid
, current
->comm
, print_tainted(),
175 init_utsname()->release
,
176 (int)strcspn(init_utsname()->version
, " "),
177 init_utsname()->version
, board
);
178 printk(KERN_INFO
"RIP: %04lx:[<%016lx>] ", regs
->cs
& 0xffff, regs
->ip
);
179 printk_address(regs
->ip
, 1);
180 printk(KERN_INFO
"RSP: %04lx:%016lx EFLAGS: %08lx\n", regs
->ss
,
181 regs
->sp
, regs
->flags
);
182 printk(KERN_INFO
"RAX: %016lx RBX: %016lx RCX: %016lx\n",
183 regs
->ax
, regs
->bx
, regs
->cx
);
184 printk(KERN_INFO
"RDX: %016lx RSI: %016lx RDI: %016lx\n",
185 regs
->dx
, regs
->si
, regs
->di
);
186 printk(KERN_INFO
"RBP: %016lx R08: %016lx R09: %016lx\n",
187 regs
->bp
, regs
->r8
, regs
->r9
);
188 printk(KERN_INFO
"R10: %016lx R11: %016lx R12: %016lx\n",
189 regs
->r10
, regs
->r11
, regs
->r12
);
190 printk(KERN_INFO
"R13: %016lx R14: %016lx R15: %016lx\n",
191 regs
->r13
, regs
->r14
, regs
->r15
);
193 asm("movl %%ds,%0" : "=r" (ds
));
194 asm("movl %%cs,%0" : "=r" (cs
));
195 asm("movl %%es,%0" : "=r" (es
));
196 asm("movl %%fs,%0" : "=r" (fsindex
));
197 asm("movl %%gs,%0" : "=r" (gsindex
));
199 rdmsrl(MSR_FS_BASE
, fs
);
200 rdmsrl(MSR_GS_BASE
, gs
);
201 rdmsrl(MSR_KERNEL_GS_BASE
, shadowgs
);
211 printk(KERN_INFO
"FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
212 fs
, fsindex
, gs
, gsindex
, shadowgs
);
213 printk(KERN_INFO
"CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs
, ds
,
215 printk(KERN_INFO
"CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2
, cr3
,
221 printk(KERN_INFO
"DR0: %016lx DR1: %016lx DR2: %016lx\n", d0
, d1
, d2
);
225 printk(KERN_INFO
"DR3: %016lx DR6: %016lx DR7: %016lx\n", d3
, d6
, d7
);
228 void show_regs(struct pt_regs
*regs
)
230 show_registers(regs
);
231 show_trace(NULL
, regs
, (void *)(regs
+ 1), regs
->bp
);
234 void release_thread(struct task_struct
*dead_task
)
237 if (dead_task
->mm
->context
.size
) {
238 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
240 dead_task
->mm
->context
.ldt
,
241 dead_task
->mm
->context
.size
);
247 static inline void set_32bit_tls(struct task_struct
*t
, int tls
, u32 addr
)
249 struct user_desc ud
= {
256 struct desc_struct
*desc
= t
->thread
.tls_array
;
261 static inline u32
read_32bit_tls(struct task_struct
*t
, int tls
)
263 return get_desc_base(&t
->thread
.tls_array
[tls
]);
267 * This gets called before we allocate a new thread and copy
268 * the current task into it.
270 void prepare_to_copy(struct task_struct
*tsk
)
275 int copy_thread(unsigned long clone_flags
, unsigned long sp
,
276 unsigned long unused
,
277 struct task_struct
*p
, struct pt_regs
*regs
)
280 struct pt_regs
*childregs
;
281 struct task_struct
*me
= current
;
283 childregs
= ((struct pt_regs
*)
284 (THREAD_SIZE
+ task_stack_page(p
))) - 1;
290 childregs
->sp
= (unsigned long)childregs
;
292 p
->thread
.sp
= (unsigned long) childregs
;
293 p
->thread
.sp0
= (unsigned long) (childregs
+1);
294 p
->thread
.usersp
= me
->thread
.usersp
;
296 set_tsk_thread_flag(p
, TIF_FORK
);
298 p
->thread
.fs
= me
->thread
.fs
;
299 p
->thread
.gs
= me
->thread
.gs
;
300 p
->thread
.io_bitmap_ptr
= NULL
;
302 savesegment(gs
, p
->thread
.gsindex
);
303 savesegment(fs
, p
->thread
.fsindex
);
304 savesegment(es
, p
->thread
.es
);
305 savesegment(ds
, p
->thread
.ds
);
308 memset(p
->thread
.ptrace_bps
, 0, sizeof(p
->thread
.ptrace_bps
));
310 if (unlikely(test_tsk_thread_flag(me
, TIF_IO_BITMAP
))) {
311 p
->thread
.io_bitmap_ptr
= kmalloc(IO_BITMAP_BYTES
, GFP_KERNEL
);
312 if (!p
->thread
.io_bitmap_ptr
) {
313 p
->thread
.io_bitmap_max
= 0;
316 memcpy(p
->thread
.io_bitmap_ptr
, me
->thread
.io_bitmap_ptr
,
318 set_tsk_thread_flag(p
, TIF_IO_BITMAP
);
322 * Set a new TLS for the child thread?
324 if (clone_flags
& CLONE_SETTLS
) {
325 #ifdef CONFIG_IA32_EMULATION
326 if (test_thread_flag(TIF_IA32
))
327 err
= do_set_thread_area(p
, -1,
328 (struct user_desc __user
*)childregs
->si
, 0);
331 err
= do_arch_prctl(p
, ARCH_SET_FS
, childregs
->r8
);
336 clear_tsk_thread_flag(p
, TIF_DS_AREA_MSR
);
337 p
->thread
.ds_ctx
= NULL
;
339 clear_tsk_thread_flag(p
, TIF_DEBUGCTLMSR
);
340 p
->thread
.debugctlmsr
= 0;
344 if (err
&& p
->thread
.io_bitmap_ptr
) {
345 kfree(p
->thread
.io_bitmap_ptr
);
346 p
->thread
.io_bitmap_max
= 0;
353 start_thread(struct pt_regs
*regs
, unsigned long new_ip
, unsigned long new_sp
)
361 percpu_write(old_rsp
, new_sp
);
362 regs
->cs
= __USER_CS
;
363 regs
->ss
= __USER_DS
;
367 * Free the old FP and other extended state
369 free_thread_xstate(current
);
371 EXPORT_SYMBOL_GPL(start_thread
);
374 * switch_to(x,y) should switch tasks from x to y.
376 * This could still be optimized:
377 * - fold all the options into a flag word and test it with a single test.
378 * - could test fs/gs bitsliced
380 * Kprobes not supported here. Set the probe on schedule instead.
381 * Function graph tracer not supported too.
383 __notrace_funcgraph
struct task_struct
*
384 __switch_to(struct task_struct
*prev_p
, struct task_struct
*next_p
)
386 struct thread_struct
*prev
= &prev_p
->thread
;
387 struct thread_struct
*next
= &next_p
->thread
;
388 int cpu
= smp_processor_id();
389 struct tss_struct
*tss
= &per_cpu(init_tss
, cpu
);
390 unsigned fsindex
, gsindex
;
394 * If the task has used fpu the last 5 timeslices, just do a full
395 * restore of the math state immediately to avoid the trap; the
396 * chances of needing FPU soon are obviously high now
398 preload_fpu
= tsk_used_math(next_p
) && next_p
->fpu_counter
> 5;
400 /* we're going to use this soon, after a few expensive things */
402 prefetch(next
->xstate
);
405 * Reload esp0, LDT and the page table pointer:
411 * This won't pick up thread selector changes, but I guess that is ok.
413 savesegment(es
, prev
->es
);
414 if (unlikely(next
->es
| prev
->es
))
415 loadsegment(es
, next
->es
);
417 savesegment(ds
, prev
->ds
);
418 if (unlikely(next
->ds
| prev
->ds
))
419 loadsegment(ds
, next
->ds
);
422 /* We must save %fs and %gs before load_TLS() because
423 * %fs and %gs may be cleared by load_TLS().
425 * (e.g. xen_load_tls())
427 savesegment(fs
, fsindex
);
428 savesegment(gs
, gsindex
);
432 /* Must be after DS reload */
435 /* Make sure cpu is ready for new context */
440 * Leave lazy mode, flushing any hypercalls made here.
441 * This must be done before restoring TLS segments so
442 * the GDT and LDT are properly updated, and must be
443 * done before math_state_restore, so the TS bit is up
446 arch_end_context_switch(next_p
);
451 * Segment register != 0 always requires a reload. Also
452 * reload when it has changed. When prev process used 64bit
453 * base always reload to avoid an information leak.
455 if (unlikely(fsindex
| next
->fsindex
| prev
->fs
)) {
456 loadsegment(fs
, next
->fsindex
);
458 * Check if the user used a selector != 0; if yes
459 * clear 64bit base, since overloaded base is always
460 * mapped to the Null selector
465 /* when next process has a 64bit base use it */
467 wrmsrl(MSR_FS_BASE
, next
->fs
);
468 prev
->fsindex
= fsindex
;
470 if (unlikely(gsindex
| next
->gsindex
| prev
->gs
)) {
471 load_gs_index(next
->gsindex
);
476 wrmsrl(MSR_KERNEL_GS_BASE
, next
->gs
);
477 prev
->gsindex
= gsindex
;
480 * Switch the PDA and FPU contexts.
482 prev
->usersp
= percpu_read(old_rsp
);
483 percpu_write(old_rsp
, next
->usersp
);
484 percpu_write(current_task
, next_p
);
486 percpu_write(kernel_stack
,
487 (unsigned long)task_stack_page(next_p
) +
488 THREAD_SIZE
- KERNEL_STACK_OFFSET
);
491 * Now maybe reload the debug registers and handle I/O bitmaps
493 if (unlikely(task_thread_info(next_p
)->flags
& _TIF_WORK_CTXSW_NEXT
||
494 task_thread_info(prev_p
)->flags
& _TIF_WORK_CTXSW_PREV
))
495 __switch_to_xtra(prev_p
, next_p
, tss
);
498 * Preload the FPU context, now that we've determined that the
499 * task is likely to be using it.
502 __math_state_restore();
508 * sys_execve() executes a new program.
511 long sys_execve(char __user
*name
, char __user
* __user
*argv
,
512 char __user
* __user
*envp
, struct pt_regs
*regs
)
517 filename
= getname(name
);
518 error
= PTR_ERR(filename
);
519 if (IS_ERR(filename
))
521 error
= do_execve(filename
, argv
, envp
, regs
);
526 void set_personality_64bit(void)
528 /* inherit personality from parent */
530 /* Make sure to be in 64bit mode */
531 clear_thread_flag(TIF_IA32
);
533 /* TBD: overwrites user setup. Should have two bits.
534 But 64bit processes have always behaved this way,
535 so it's not too bad. The main problem is just that
536 32bit childs are affected again. */
537 current
->personality
&= ~READ_IMPLIES_EXEC
;
541 sys_clone(unsigned long clone_flags
, unsigned long newsp
,
542 void __user
*parent_tid
, void __user
*child_tid
, struct pt_regs
*regs
)
546 return do_fork(clone_flags
, newsp
, regs
, 0, parent_tid
, child_tid
);
549 unsigned long get_wchan(struct task_struct
*p
)
555 if (!p
|| p
== current
|| p
->state
== TASK_RUNNING
)
557 stack
= (unsigned long)task_stack_page(p
);
558 if (p
->thread
.sp
< stack
|| p
->thread
.sp
>= stack
+THREAD_SIZE
)
560 fp
= *(u64
*)(p
->thread
.sp
);
562 if (fp
< (unsigned long)stack
||
563 fp
>= (unsigned long)stack
+THREAD_SIZE
)
566 if (!in_sched_functions(ip
))
569 } while (count
++ < 16);
573 long do_arch_prctl(struct task_struct
*task
, int code
, unsigned long addr
)
576 int doit
= task
== current
;
581 if (addr
>= TASK_SIZE_OF(task
))
584 /* handle small bases via the GDT because that's faster to
586 if (addr
<= 0xffffffff) {
587 set_32bit_tls(task
, GS_TLS
, addr
);
589 load_TLS(&task
->thread
, cpu
);
590 load_gs_index(GS_TLS_SEL
);
592 task
->thread
.gsindex
= GS_TLS_SEL
;
595 task
->thread
.gsindex
= 0;
596 task
->thread
.gs
= addr
;
599 ret
= checking_wrmsrl(MSR_KERNEL_GS_BASE
, addr
);
605 /* Not strictly needed for fs, but do it for symmetry
607 if (addr
>= TASK_SIZE_OF(task
))
610 /* handle small bases via the GDT because that's faster to
612 if (addr
<= 0xffffffff) {
613 set_32bit_tls(task
, FS_TLS
, addr
);
615 load_TLS(&task
->thread
, cpu
);
616 loadsegment(fs
, FS_TLS_SEL
);
618 task
->thread
.fsindex
= FS_TLS_SEL
;
621 task
->thread
.fsindex
= 0;
622 task
->thread
.fs
= addr
;
624 /* set the selector to 0 to not confuse
627 ret
= checking_wrmsrl(MSR_FS_BASE
, addr
);
634 if (task
->thread
.fsindex
== FS_TLS_SEL
)
635 base
= read_32bit_tls(task
, FS_TLS
);
637 rdmsrl(MSR_FS_BASE
, base
);
639 base
= task
->thread
.fs
;
640 ret
= put_user(base
, (unsigned long __user
*)addr
);
646 if (task
->thread
.gsindex
== GS_TLS_SEL
)
647 base
= read_32bit_tls(task
, GS_TLS
);
649 savesegment(gs
, gsindex
);
651 rdmsrl(MSR_KERNEL_GS_BASE
, base
);
653 base
= task
->thread
.gs
;
655 base
= task
->thread
.gs
;
656 ret
= put_user(base
, (unsigned long __user
*)addr
);
668 long sys_arch_prctl(int code
, unsigned long addr
)
670 return do_arch_prctl(current
, code
, addr
);
673 unsigned long KSTK_ESP(struct task_struct
*task
)
675 return (test_tsk_thread_flag(task
, TIF_IA32
)) ?
676 (task_pt_regs(task
)->sp
) : ((task
)->thread
.usersp
);