x86: use the correct cpuid method to detect MWAIT support for C states
[linux-2.6/mini2440.git] / arch / x86 / kernel / process_64.c
blobdbe0a846ec52a2ec23033c4e480a63f8495eba85
1 /*
2 * Copyright (C) 1995 Linus Torvalds
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
7 * X86-64 port
8 * Andi Kleen.
10 * CPU hotplug support - ashok.raj@intel.com
14 * This file handles the architecture-dependent parts of process handling..
17 #include <stdarg.h>
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/a.out.h>
30 #include <linux/interrupt.h>
31 #include <linux/utsname.h>
32 #include <linux/delay.h>
33 #include <linux/module.h>
34 #include <linux/ptrace.h>
35 #include <linux/random.h>
36 #include <linux/notifier.h>
37 #include <linux/kprobes.h>
38 #include <linux/kdebug.h>
39 #include <linux/tick.h>
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
43 #include <asm/system.h>
44 #include <asm/io.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
55 asmlinkage extern void ret_from_fork(void);
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
63 * Powermanagement idle function, if any..
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
69 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
71 void idle_notifier_register(struct notifier_block *n)
73 atomic_notifier_chain_register(&idle_notifier, n);
76 void enter_idle(void)
78 write_pda(isidle, 1);
79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
82 static void __exit_idle(void)
84 if (test_and_clear_bit_pda(0, isidle) == 0)
85 return;
86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
89 /* Called from interrupts to signify idle end */
90 void exit_idle(void)
92 /* idle loop has pid 0 */
93 if (current->pid)
94 return;
95 __exit_idle();
99 * We use this if we don't have any better
100 * idle routine..
102 void default_idle(void)
104 current_thread_info()->status &= ~TS_POLLING;
106 * TS_POLLING-cleared state must be visible before we
107 * test NEED_RESCHED:
109 smp_mb();
110 local_irq_disable();
111 if (!need_resched()) {
112 ktime_t t0, t1;
113 u64 t0n, t1n;
115 t0 = ktime_get();
116 t0n = ktime_to_ns(t0);
117 safe_halt(); /* enables interrupts racelessly */
118 local_irq_disable();
119 t1 = ktime_get();
120 t1n = ktime_to_ns(t1);
121 sched_clock_idle_wakeup_event(t1n - t0n);
123 local_irq_enable();
124 current_thread_info()->status |= TS_POLLING;
128 * On SMP it's slightly faster (but much more power-consuming!)
129 * to poll the ->need_resched flag instead of waiting for the
130 * cross-CPU IPI to arrive. Use this option with caution.
132 static void poll_idle(void)
134 local_irq_enable();
135 cpu_relax();
138 #ifdef CONFIG_HOTPLUG_CPU
139 DECLARE_PER_CPU(int, cpu_state);
141 #include <asm/nmi.h>
142 /* We halt the CPU with physical CPU hotplug */
143 static inline void play_dead(void)
145 idle_task_exit();
146 wbinvd();
147 mb();
148 /* Ack it */
149 __get_cpu_var(cpu_state) = CPU_DEAD;
151 local_irq_disable();
152 while (1)
153 halt();
155 #else
156 static inline void play_dead(void)
158 BUG();
160 #endif /* CONFIG_HOTPLUG_CPU */
163 * The idle thread. There's no useful work to be
164 * done, so just try to conserve power and have a
165 * low exit latency (ie sit in a loop waiting for
166 * somebody to say that they'd like to reschedule)
168 void cpu_idle(void)
170 current_thread_info()->status |= TS_POLLING;
171 /* endless idle loop with no priority at all */
172 while (1) {
173 tick_nohz_stop_sched_tick();
174 while (!need_resched()) {
175 void (*idle)(void);
177 if (__get_cpu_var(cpu_idle_state))
178 __get_cpu_var(cpu_idle_state) = 0;
180 rmb();
181 idle = pm_idle;
182 if (!idle)
183 idle = default_idle;
184 if (cpu_is_offline(smp_processor_id()))
185 play_dead();
187 * Idle routines should keep interrupts disabled
188 * from here on, until they go to idle.
189 * Otherwise, idle callbacks can misfire.
191 local_irq_disable();
192 enter_idle();
193 idle();
194 /* In many cases the interrupt that ended idle
195 has already called exit_idle. But some idle
196 loops can be woken up without interrupt. */
197 __exit_idle();
200 tick_nohz_restart_sched_tick();
201 preempt_enable_no_resched();
202 schedule();
203 preempt_disable();
207 static void do_nothing(void *unused)
211 void cpu_idle_wait(void)
213 unsigned int cpu, this_cpu = get_cpu();
214 cpumask_t map, tmp = current->cpus_allowed;
216 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
217 put_cpu();
219 cpus_clear(map);
220 for_each_online_cpu(cpu) {
221 per_cpu(cpu_idle_state, cpu) = 1;
222 cpu_set(cpu, map);
225 __get_cpu_var(cpu_idle_state) = 0;
227 wmb();
228 do {
229 ssleep(1);
230 for_each_online_cpu(cpu) {
231 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
232 cpu_clear(cpu, map);
234 cpus_and(map, map, cpu_online_map);
236 * We waited 1 sec, if a CPU still did not call idle
237 * it may be because it is in idle and not waking up
238 * because it has nothing to do.
239 * Give all the remaining CPUS a kick.
241 smp_call_function_mask(map, do_nothing, 0, 0);
242 } while (!cpus_empty(map));
244 set_cpus_allowed(current, tmp);
246 EXPORT_SYMBOL_GPL(cpu_idle_wait);
249 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
250 * which can obviate IPI to trigger checking of need_resched.
251 * We execute MONITOR against need_resched and enter optimized wait state
252 * through MWAIT. Whenever someone changes need_resched, we would be woken
253 * up from MWAIT (without an IPI).
255 * New with Core Duo processors, MWAIT can take some hints based on CPU
256 * capability.
258 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
260 if (!need_resched()) {
261 __monitor((void *)&current_thread_info()->flags, 0, 0);
262 smp_mb();
263 if (!need_resched())
264 __mwait(ax, cx);
268 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
269 static void mwait_idle(void)
271 if (!need_resched()) {
272 __monitor((void *)&current_thread_info()->flags, 0, 0);
273 smp_mb();
274 if (!need_resched())
275 __sti_mwait(0, 0);
276 else
277 local_irq_enable();
278 } else {
279 local_irq_enable();
284 static int mwait_usable(const struct cpuinfo_x86 *c)
286 if (force_mwait)
287 return 1;
288 /* Any C1 states supported? */
289 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
292 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
294 static int printed;
295 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
297 * Skip, if setup has overridden idle.
298 * One CPU supports mwait => All CPUs supports mwait
300 if (!pm_idle) {
301 if (!printed) {
302 printk(KERN_INFO "using mwait in idle threads.\n");
303 printed = 1;
305 pm_idle = mwait_idle;
310 static int __init idle_setup(char *str)
312 if (!strcmp(str, "poll")) {
313 printk("using polling idle threads.\n");
314 pm_idle = poll_idle;
315 } else if (!strcmp(str, "mwait"))
316 force_mwait = 1;
317 else
318 return -1;
320 boot_option_idle_override = 1;
321 return 0;
323 early_param("idle", idle_setup);
325 /* Prints also some state that isn't saved in the pt_regs */
326 void __show_regs(struct pt_regs * regs)
328 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
329 unsigned long d0, d1, d2, d3, d6, d7;
330 unsigned int fsindex, gsindex;
331 unsigned int ds, cs, es;
333 printk("\n");
334 print_modules();
335 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
336 current->pid, current->comm, print_tainted(),
337 init_utsname()->release,
338 (int)strcspn(init_utsname()->version, " "),
339 init_utsname()->version);
340 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
341 printk_address(regs->ip, 1);
342 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
343 regs->flags);
344 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
345 regs->ax, regs->bx, regs->cx);
346 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
347 regs->dx, regs->si, regs->di);
348 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
349 regs->bp, regs->r8, regs->r9);
350 printk("R10: %016lx R11: %016lx R12: %016lx\n",
351 regs->r10, regs->r11, regs->r12);
352 printk("R13: %016lx R14: %016lx R15: %016lx\n",
353 regs->r13, regs->r14, regs->r15);
355 asm("movl %%ds,%0" : "=r" (ds));
356 asm("movl %%cs,%0" : "=r" (cs));
357 asm("movl %%es,%0" : "=r" (es));
358 asm("movl %%fs,%0" : "=r" (fsindex));
359 asm("movl %%gs,%0" : "=r" (gsindex));
361 rdmsrl(MSR_FS_BASE, fs);
362 rdmsrl(MSR_GS_BASE, gs);
363 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
365 cr0 = read_cr0();
366 cr2 = read_cr2();
367 cr3 = read_cr3();
368 cr4 = read_cr4();
370 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
371 fs,fsindex,gs,gsindex,shadowgs);
372 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
373 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
375 get_debugreg(d0, 0);
376 get_debugreg(d1, 1);
377 get_debugreg(d2, 2);
378 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
379 get_debugreg(d3, 3);
380 get_debugreg(d6, 6);
381 get_debugreg(d7, 7);
382 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
385 void show_regs(struct pt_regs *regs)
387 printk("CPU %d:", smp_processor_id());
388 __show_regs(regs);
389 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
393 * Free current thread data structures etc..
395 void exit_thread(void)
397 struct task_struct *me = current;
398 struct thread_struct *t = &me->thread;
400 if (me->thread.io_bitmap_ptr) {
401 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
403 kfree(t->io_bitmap_ptr);
404 t->io_bitmap_ptr = NULL;
405 clear_thread_flag(TIF_IO_BITMAP);
407 * Careful, clear this in the TSS too:
409 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
410 t->io_bitmap_max = 0;
411 put_cpu();
415 void flush_thread(void)
417 struct task_struct *tsk = current;
419 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
420 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
421 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
422 clear_tsk_thread_flag(tsk, TIF_IA32);
423 } else {
424 set_tsk_thread_flag(tsk, TIF_IA32);
425 current_thread_info()->status |= TS_COMPAT;
428 clear_tsk_thread_flag(tsk, TIF_DEBUG);
430 tsk->thread.debugreg0 = 0;
431 tsk->thread.debugreg1 = 0;
432 tsk->thread.debugreg2 = 0;
433 tsk->thread.debugreg3 = 0;
434 tsk->thread.debugreg6 = 0;
435 tsk->thread.debugreg7 = 0;
436 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
438 * Forget coprocessor state..
440 clear_fpu(tsk);
441 clear_used_math();
444 void release_thread(struct task_struct *dead_task)
446 if (dead_task->mm) {
447 if (dead_task->mm->context.size) {
448 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
449 dead_task->comm,
450 dead_task->mm->context.ldt,
451 dead_task->mm->context.size);
452 BUG();
457 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
459 struct user_desc ud = {
460 .base_addr = addr,
461 .limit = 0xfffff,
462 .seg_32bit = 1,
463 .limit_in_pages = 1,
464 .useable = 1,
466 struct desc_struct *desc = (void *)t->thread.tls_array;
467 desc += tls;
468 fill_ldt(desc, &ud);
471 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
473 return get_desc_base(&t->thread.tls_array[tls]);
477 * This gets called before we allocate a new thread and copy
478 * the current task into it.
480 void prepare_to_copy(struct task_struct *tsk)
482 unlazy_fpu(tsk);
485 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
486 unsigned long unused,
487 struct task_struct * p, struct pt_regs * regs)
489 int err;
490 struct pt_regs * childregs;
491 struct task_struct *me = current;
493 childregs = ((struct pt_regs *)
494 (THREAD_SIZE + task_stack_page(p))) - 1;
495 *childregs = *regs;
497 childregs->ax = 0;
498 childregs->sp = sp;
499 if (sp == ~0UL)
500 childregs->sp = (unsigned long)childregs;
502 p->thread.sp = (unsigned long) childregs;
503 p->thread.sp0 = (unsigned long) (childregs+1);
504 p->thread.usersp = me->thread.usersp;
506 set_tsk_thread_flag(p, TIF_FORK);
508 p->thread.fs = me->thread.fs;
509 p->thread.gs = me->thread.gs;
511 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
512 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
513 asm("mov %%es,%0" : "=m" (p->thread.es));
514 asm("mov %%ds,%0" : "=m" (p->thread.ds));
516 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
517 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
518 if (!p->thread.io_bitmap_ptr) {
519 p->thread.io_bitmap_max = 0;
520 return -ENOMEM;
522 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
523 IO_BITMAP_BYTES);
524 set_tsk_thread_flag(p, TIF_IO_BITMAP);
528 * Set a new TLS for the child thread?
530 if (clone_flags & CLONE_SETTLS) {
531 #ifdef CONFIG_IA32_EMULATION
532 if (test_thread_flag(TIF_IA32))
533 err = do_set_thread_area(p, -1,
534 (struct user_desc __user *)childregs->si, 0);
535 else
536 #endif
537 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
538 if (err)
539 goto out;
541 err = 0;
542 out:
543 if (err && p->thread.io_bitmap_ptr) {
544 kfree(p->thread.io_bitmap_ptr);
545 p->thread.io_bitmap_max = 0;
547 return err;
551 * This special macro can be used to load a debugging register
553 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
555 static inline void __switch_to_xtra(struct task_struct *prev_p,
556 struct task_struct *next_p,
557 struct tss_struct *tss)
559 struct thread_struct *prev, *next;
560 unsigned long debugctl;
562 prev = &prev_p->thread,
563 next = &next_p->thread;
565 debugctl = prev->debugctlmsr;
566 if (next->ds_area_msr != prev->ds_area_msr) {
567 /* we clear debugctl to make sure DS
568 * is not in use when we change it */
569 debugctl = 0;
570 wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
571 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
574 if (next->debugctlmsr != debugctl)
575 wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
577 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
578 loaddebug(next, 0);
579 loaddebug(next, 1);
580 loaddebug(next, 2);
581 loaddebug(next, 3);
582 /* no 4 and 5 */
583 loaddebug(next, 6);
584 loaddebug(next, 7);
587 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
589 * Copy the relevant range of the IO bitmap.
590 * Normally this is 128 bytes or less:
592 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
593 max(prev->io_bitmap_max, next->io_bitmap_max));
594 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
596 * Clear any possible leftover bits:
598 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
601 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
602 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
604 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
605 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
609 * switch_to(x,y) should switch tasks from x to y.
611 * This could still be optimized:
612 * - fold all the options into a flag word and test it with a single test.
613 * - could test fs/gs bitsliced
615 * Kprobes not supported here. Set the probe on schedule instead.
617 struct task_struct *
618 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
620 struct thread_struct *prev = &prev_p->thread,
621 *next = &next_p->thread;
622 int cpu = smp_processor_id();
623 struct tss_struct *tss = &per_cpu(init_tss, cpu);
625 /* we're going to use this soon, after a few expensive things */
626 if (next_p->fpu_counter>5)
627 prefetch(&next->i387.fxsave);
630 * Reload esp0, LDT and the page table pointer:
632 load_sp0(tss, next);
635 * Switch DS and ES.
636 * This won't pick up thread selector changes, but I guess that is ok.
638 asm volatile("mov %%es,%0" : "=m" (prev->es));
639 if (unlikely(next->es | prev->es))
640 loadsegment(es, next->es);
642 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
643 if (unlikely(next->ds | prev->ds))
644 loadsegment(ds, next->ds);
646 load_TLS(next, cpu);
649 * Switch FS and GS.
652 unsigned fsindex;
653 asm volatile("movl %%fs,%0" : "=r" (fsindex));
654 /* segment register != 0 always requires a reload.
655 also reload when it has changed.
656 when prev process used 64bit base always reload
657 to avoid an information leak. */
658 if (unlikely(fsindex | next->fsindex | prev->fs)) {
659 loadsegment(fs, next->fsindex);
660 /* check if the user used a selector != 0
661 * if yes clear 64bit base, since overloaded base
662 * is always mapped to the Null selector
664 if (fsindex)
665 prev->fs = 0;
667 /* when next process has a 64bit base use it */
668 if (next->fs)
669 wrmsrl(MSR_FS_BASE, next->fs);
670 prev->fsindex = fsindex;
673 unsigned gsindex;
674 asm volatile("movl %%gs,%0" : "=r" (gsindex));
675 if (unlikely(gsindex | next->gsindex | prev->gs)) {
676 load_gs_index(next->gsindex);
677 if (gsindex)
678 prev->gs = 0;
680 if (next->gs)
681 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
682 prev->gsindex = gsindex;
685 /* Must be after DS reload */
686 unlazy_fpu(prev_p);
689 * Switch the PDA and FPU contexts.
691 prev->usersp = read_pda(oldrsp);
692 write_pda(oldrsp, next->usersp);
693 write_pda(pcurrent, next_p);
695 write_pda(kernelstack,
696 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
697 #ifdef CONFIG_CC_STACKPROTECTOR
698 write_pda(stack_canary, next_p->stack_canary);
700 * Build time only check to make sure the stack_canary is at
701 * offset 40 in the pda; this is a gcc ABI requirement
703 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
704 #endif
707 * Now maybe reload the debug registers and handle I/O bitmaps
709 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
710 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
711 __switch_to_xtra(prev_p, next_p, tss);
713 /* If the task has used fpu the last 5 timeslices, just do a full
714 * restore of the math state immediately to avoid the trap; the
715 * chances of needing FPU soon are obviously high now
717 if (next_p->fpu_counter>5)
718 math_state_restore();
719 return prev_p;
723 * sys_execve() executes a new program.
725 asmlinkage
726 long sys_execve(char __user *name, char __user * __user *argv,
727 char __user * __user *envp, struct pt_regs regs)
729 long error;
730 char * filename;
732 filename = getname(name);
733 error = PTR_ERR(filename);
734 if (IS_ERR(filename))
735 return error;
736 error = do_execve(filename, argv, envp, &regs);
737 putname(filename);
738 return error;
741 void set_personality_64bit(void)
743 /* inherit personality from parent */
745 /* Make sure to be in 64bit mode */
746 clear_thread_flag(TIF_IA32);
748 /* TBD: overwrites user setup. Should have two bits.
749 But 64bit processes have always behaved this way,
750 so it's not too bad. The main problem is just that
751 32bit childs are affected again. */
752 current->personality &= ~READ_IMPLIES_EXEC;
755 asmlinkage long sys_fork(struct pt_regs *regs)
757 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
760 asmlinkage long
761 sys_clone(unsigned long clone_flags, unsigned long newsp,
762 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
764 if (!newsp)
765 newsp = regs->sp;
766 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
770 * This is trivial, and on the face of it looks like it
771 * could equally well be done in user mode.
773 * Not so, for quite unobvious reasons - register pressure.
774 * In user mode vfork() cannot have a stack frame, and if
775 * done by calling the "clone()" system call directly, you
776 * do not have enough call-clobbered registers to hold all
777 * the information you need.
779 asmlinkage long sys_vfork(struct pt_regs *regs)
781 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
782 NULL, NULL);
785 unsigned long get_wchan(struct task_struct *p)
787 unsigned long stack;
788 u64 fp,ip;
789 int count = 0;
791 if (!p || p == current || p->state==TASK_RUNNING)
792 return 0;
793 stack = (unsigned long)task_stack_page(p);
794 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
795 return 0;
796 fp = *(u64 *)(p->thread.sp);
797 do {
798 if (fp < (unsigned long)stack ||
799 fp > (unsigned long)stack+THREAD_SIZE)
800 return 0;
801 ip = *(u64 *)(fp+8);
802 if (!in_sched_functions(ip))
803 return ip;
804 fp = *(u64 *)fp;
805 } while (count++ < 16);
806 return 0;
809 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
811 int ret = 0;
812 int doit = task == current;
813 int cpu;
815 switch (code) {
816 case ARCH_SET_GS:
817 if (addr >= TASK_SIZE_OF(task))
818 return -EPERM;
819 cpu = get_cpu();
820 /* handle small bases via the GDT because that's faster to
821 switch. */
822 if (addr <= 0xffffffff) {
823 set_32bit_tls(task, GS_TLS, addr);
824 if (doit) {
825 load_TLS(&task->thread, cpu);
826 load_gs_index(GS_TLS_SEL);
828 task->thread.gsindex = GS_TLS_SEL;
829 task->thread.gs = 0;
830 } else {
831 task->thread.gsindex = 0;
832 task->thread.gs = addr;
833 if (doit) {
834 load_gs_index(0);
835 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
838 put_cpu();
839 break;
840 case ARCH_SET_FS:
841 /* Not strictly needed for fs, but do it for symmetry
842 with gs */
843 if (addr >= TASK_SIZE_OF(task))
844 return -EPERM;
845 cpu = get_cpu();
846 /* handle small bases via the GDT because that's faster to
847 switch. */
848 if (addr <= 0xffffffff) {
849 set_32bit_tls(task, FS_TLS, addr);
850 if (doit) {
851 load_TLS(&task->thread, cpu);
852 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
854 task->thread.fsindex = FS_TLS_SEL;
855 task->thread.fs = 0;
856 } else {
857 task->thread.fsindex = 0;
858 task->thread.fs = addr;
859 if (doit) {
860 /* set the selector to 0 to not confuse
861 __switch_to */
862 asm volatile("movl %0,%%fs" :: "r" (0));
863 ret = checking_wrmsrl(MSR_FS_BASE, addr);
866 put_cpu();
867 break;
868 case ARCH_GET_FS: {
869 unsigned long base;
870 if (task->thread.fsindex == FS_TLS_SEL)
871 base = read_32bit_tls(task, FS_TLS);
872 else if (doit)
873 rdmsrl(MSR_FS_BASE, base);
874 else
875 base = task->thread.fs;
876 ret = put_user(base, (unsigned long __user *)addr);
877 break;
879 case ARCH_GET_GS: {
880 unsigned long base;
881 unsigned gsindex;
882 if (task->thread.gsindex == GS_TLS_SEL)
883 base = read_32bit_tls(task, GS_TLS);
884 else if (doit) {
885 asm("movl %%gs,%0" : "=r" (gsindex));
886 if (gsindex)
887 rdmsrl(MSR_KERNEL_GS_BASE, base);
888 else
889 base = task->thread.gs;
891 else
892 base = task->thread.gs;
893 ret = put_user(base, (unsigned long __user *)addr);
894 break;
897 default:
898 ret = -EINVAL;
899 break;
902 return ret;
905 long sys_arch_prctl(int code, unsigned long addr)
907 return do_arch_prctl(current, code, addr);
910 unsigned long arch_align_stack(unsigned long sp)
912 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
913 sp -= get_random_int() % 8192;
914 return sp & ~0xf;
917 unsigned long arch_randomize_brk(struct mm_struct *mm)
919 unsigned long range_end = mm->brk + 0x02000000;
920 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;