KVM: VMX: Force seg.base == (seg.sel << 4) in real mode
[linux-2.6/mini2440.git] / arch / x86 / kernel / process_64.c
blob137a86171c393ac64fd0e6df392d40caad951b9f
1 /*
2 * Copyright (C) 1995 Linus Torvalds
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
7 * X86-64 port
8 * Andi Kleen.
10 * CPU hotplug support - ashok.raj@intel.com
14 * This file handles the architecture-dependent parts of process handling..
17 #include <stdarg.h>
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/a.out.h>
30 #include <linux/interrupt.h>
31 #include <linux/utsname.h>
32 #include <linux/delay.h>
33 #include <linux/module.h>
34 #include <linux/ptrace.h>
35 #include <linux/random.h>
36 #include <linux/notifier.h>
37 #include <linux/kprobes.h>
38 #include <linux/kdebug.h>
39 #include <linux/tick.h>
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
43 #include <asm/system.h>
44 #include <asm/io.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
55 asmlinkage extern void ret_from_fork(void);
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
63 * Powermanagement idle function, if any..
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
69 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
71 void idle_notifier_register(struct notifier_block *n)
73 atomic_notifier_chain_register(&idle_notifier, n);
76 void enter_idle(void)
78 write_pda(isidle, 1);
79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
82 static void __exit_idle(void)
84 if (test_and_clear_bit_pda(0, isidle) == 0)
85 return;
86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
89 /* Called from interrupts to signify idle end */
90 void exit_idle(void)
92 /* idle loop has pid 0 */
93 if (current->pid)
94 return;
95 __exit_idle();
99 * We use this if we don't have any better
100 * idle routine..
102 void default_idle(void)
104 current_thread_info()->status &= ~TS_POLLING;
106 * TS_POLLING-cleared state must be visible before we
107 * test NEED_RESCHED:
109 smp_mb();
110 local_irq_disable();
111 if (!need_resched()) {
112 ktime_t t0, t1;
113 u64 t0n, t1n;
115 t0 = ktime_get();
116 t0n = ktime_to_ns(t0);
117 safe_halt(); /* enables interrupts racelessly */
118 local_irq_disable();
119 t1 = ktime_get();
120 t1n = ktime_to_ns(t1);
121 sched_clock_idle_wakeup_event(t1n - t0n);
123 local_irq_enable();
124 current_thread_info()->status |= TS_POLLING;
128 * On SMP it's slightly faster (but much more power-consuming!)
129 * to poll the ->need_resched flag instead of waiting for the
130 * cross-CPU IPI to arrive. Use this option with caution.
132 static void poll_idle(void)
134 local_irq_enable();
135 cpu_relax();
138 #ifdef CONFIG_HOTPLUG_CPU
139 DECLARE_PER_CPU(int, cpu_state);
141 #include <asm/nmi.h>
142 /* We halt the CPU with physical CPU hotplug */
143 static inline void play_dead(void)
145 idle_task_exit();
146 wbinvd();
147 mb();
148 /* Ack it */
149 __get_cpu_var(cpu_state) = CPU_DEAD;
151 local_irq_disable();
152 while (1)
153 halt();
155 #else
156 static inline void play_dead(void)
158 BUG();
160 #endif /* CONFIG_HOTPLUG_CPU */
163 * The idle thread. There's no useful work to be
164 * done, so just try to conserve power and have a
165 * low exit latency (ie sit in a loop waiting for
166 * somebody to say that they'd like to reschedule)
168 void cpu_idle(void)
170 current_thread_info()->status |= TS_POLLING;
171 /* endless idle loop with no priority at all */
172 while (1) {
173 tick_nohz_stop_sched_tick();
174 while (!need_resched()) {
175 void (*idle)(void);
177 if (__get_cpu_var(cpu_idle_state))
178 __get_cpu_var(cpu_idle_state) = 0;
180 rmb();
181 idle = pm_idle;
182 if (!idle)
183 idle = default_idle;
184 if (cpu_is_offline(smp_processor_id()))
185 play_dead();
187 * Idle routines should keep interrupts disabled
188 * from here on, until they go to idle.
189 * Otherwise, idle callbacks can misfire.
191 local_irq_disable();
192 enter_idle();
193 idle();
194 /* In many cases the interrupt that ended idle
195 has already called exit_idle. But some idle
196 loops can be woken up without interrupt. */
197 __exit_idle();
200 tick_nohz_restart_sched_tick();
201 preempt_enable_no_resched();
202 schedule();
203 preempt_disable();
207 static void do_nothing(void *unused)
211 void cpu_idle_wait(void)
213 unsigned int cpu, this_cpu = get_cpu();
214 cpumask_t map, tmp = current->cpus_allowed;
216 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
217 put_cpu();
219 cpus_clear(map);
220 for_each_online_cpu(cpu) {
221 per_cpu(cpu_idle_state, cpu) = 1;
222 cpu_set(cpu, map);
225 __get_cpu_var(cpu_idle_state) = 0;
227 wmb();
228 do {
229 ssleep(1);
230 for_each_online_cpu(cpu) {
231 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
232 cpu_clear(cpu, map);
234 cpus_and(map, map, cpu_online_map);
236 * We waited 1 sec, if a CPU still did not call idle
237 * it may be because it is in idle and not waking up
238 * because it has nothing to do.
239 * Give all the remaining CPUS a kick.
241 smp_call_function_mask(map, do_nothing, 0, 0);
242 } while (!cpus_empty(map));
244 set_cpus_allowed(current, tmp);
246 EXPORT_SYMBOL_GPL(cpu_idle_wait);
249 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
250 * which can obviate IPI to trigger checking of need_resched.
251 * We execute MONITOR against need_resched and enter optimized wait state
252 * through MWAIT. Whenever someone changes need_resched, we would be woken
253 * up from MWAIT (without an IPI).
255 * New with Core Duo processors, MWAIT can take some hints based on CPU
256 * capability.
258 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
260 if (!need_resched()) {
261 __monitor((void *)&current_thread_info()->flags, 0, 0);
262 smp_mb();
263 if (!need_resched())
264 __mwait(ax, cx);
268 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
269 static void mwait_idle(void)
271 if (!need_resched()) {
272 __monitor((void *)&current_thread_info()->flags, 0, 0);
273 smp_mb();
274 if (!need_resched())
275 __sti_mwait(0, 0);
276 else
277 local_irq_enable();
278 } else {
279 local_irq_enable();
284 static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
286 if (force_mwait)
287 return 1;
288 /* Any C1 states supported? */
289 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
292 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
294 static int selected;
296 if (selected)
297 return;
298 #ifdef CONFIG_X86_SMP
299 if (pm_idle == poll_idle && smp_num_siblings > 1) {
300 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
301 " performance may degrade.\n");
303 #endif
304 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
306 * Skip, if setup has overridden idle.
307 * One CPU supports mwait => All CPUs supports mwait
309 if (!pm_idle) {
310 printk(KERN_INFO "using mwait in idle threads.\n");
311 pm_idle = mwait_idle;
314 selected = 1;
317 static int __init idle_setup(char *str)
319 if (!strcmp(str, "poll")) {
320 printk("using polling idle threads.\n");
321 pm_idle = poll_idle;
322 } else if (!strcmp(str, "mwait"))
323 force_mwait = 1;
324 else
325 return -1;
327 boot_option_idle_override = 1;
328 return 0;
330 early_param("idle", idle_setup);
332 /* Prints also some state that isn't saved in the pt_regs */
333 void __show_regs(struct pt_regs * regs)
335 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
336 unsigned long d0, d1, d2, d3, d6, d7;
337 unsigned int fsindex, gsindex;
338 unsigned int ds, cs, es;
340 printk("\n");
341 print_modules();
342 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
343 current->pid, current->comm, print_tainted(),
344 init_utsname()->release,
345 (int)strcspn(init_utsname()->version, " "),
346 init_utsname()->version);
347 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
348 printk_address(regs->ip, 1);
349 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
350 regs->flags);
351 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
352 regs->ax, regs->bx, regs->cx);
353 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
354 regs->dx, regs->si, regs->di);
355 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
356 regs->bp, regs->r8, regs->r9);
357 printk("R10: %016lx R11: %016lx R12: %016lx\n",
358 regs->r10, regs->r11, regs->r12);
359 printk("R13: %016lx R14: %016lx R15: %016lx\n",
360 regs->r13, regs->r14, regs->r15);
362 asm("movl %%ds,%0" : "=r" (ds));
363 asm("movl %%cs,%0" : "=r" (cs));
364 asm("movl %%es,%0" : "=r" (es));
365 asm("movl %%fs,%0" : "=r" (fsindex));
366 asm("movl %%gs,%0" : "=r" (gsindex));
368 rdmsrl(MSR_FS_BASE, fs);
369 rdmsrl(MSR_GS_BASE, gs);
370 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
372 cr0 = read_cr0();
373 cr2 = read_cr2();
374 cr3 = read_cr3();
375 cr4 = read_cr4();
377 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
378 fs,fsindex,gs,gsindex,shadowgs);
379 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
380 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
382 get_debugreg(d0, 0);
383 get_debugreg(d1, 1);
384 get_debugreg(d2, 2);
385 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
386 get_debugreg(d3, 3);
387 get_debugreg(d6, 6);
388 get_debugreg(d7, 7);
389 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
392 void show_regs(struct pt_regs *regs)
394 printk("CPU %d:", smp_processor_id());
395 __show_regs(regs);
396 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
400 * Free current thread data structures etc..
402 void exit_thread(void)
404 struct task_struct *me = current;
405 struct thread_struct *t = &me->thread;
407 if (me->thread.io_bitmap_ptr) {
408 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
410 kfree(t->io_bitmap_ptr);
411 t->io_bitmap_ptr = NULL;
412 clear_thread_flag(TIF_IO_BITMAP);
414 * Careful, clear this in the TSS too:
416 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
417 t->io_bitmap_max = 0;
418 put_cpu();
422 void flush_thread(void)
424 struct task_struct *tsk = current;
426 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
427 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
428 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
429 clear_tsk_thread_flag(tsk, TIF_IA32);
430 } else {
431 set_tsk_thread_flag(tsk, TIF_IA32);
432 current_thread_info()->status |= TS_COMPAT;
435 clear_tsk_thread_flag(tsk, TIF_DEBUG);
437 tsk->thread.debugreg0 = 0;
438 tsk->thread.debugreg1 = 0;
439 tsk->thread.debugreg2 = 0;
440 tsk->thread.debugreg3 = 0;
441 tsk->thread.debugreg6 = 0;
442 tsk->thread.debugreg7 = 0;
443 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
445 * Forget coprocessor state..
447 clear_fpu(tsk);
448 clear_used_math();
451 void release_thread(struct task_struct *dead_task)
453 if (dead_task->mm) {
454 if (dead_task->mm->context.size) {
455 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
456 dead_task->comm,
457 dead_task->mm->context.ldt,
458 dead_task->mm->context.size);
459 BUG();
464 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
466 struct user_desc ud = {
467 .base_addr = addr,
468 .limit = 0xfffff,
469 .seg_32bit = 1,
470 .limit_in_pages = 1,
471 .useable = 1,
473 struct desc_struct *desc = t->thread.tls_array;
474 desc += tls;
475 fill_ldt(desc, &ud);
478 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
480 return get_desc_base(&t->thread.tls_array[tls]);
484 * This gets called before we allocate a new thread and copy
485 * the current task into it.
487 void prepare_to_copy(struct task_struct *tsk)
489 unlazy_fpu(tsk);
492 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
493 unsigned long unused,
494 struct task_struct * p, struct pt_regs * regs)
496 int err;
497 struct pt_regs * childregs;
498 struct task_struct *me = current;
500 childregs = ((struct pt_regs *)
501 (THREAD_SIZE + task_stack_page(p))) - 1;
502 *childregs = *regs;
504 childregs->ax = 0;
505 childregs->sp = sp;
506 if (sp == ~0UL)
507 childregs->sp = (unsigned long)childregs;
509 p->thread.sp = (unsigned long) childregs;
510 p->thread.sp0 = (unsigned long) (childregs+1);
511 p->thread.usersp = me->thread.usersp;
513 set_tsk_thread_flag(p, TIF_FORK);
515 p->thread.fs = me->thread.fs;
516 p->thread.gs = me->thread.gs;
518 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
519 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
520 asm("mov %%es,%0" : "=m" (p->thread.es));
521 asm("mov %%ds,%0" : "=m" (p->thread.ds));
523 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
524 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
525 if (!p->thread.io_bitmap_ptr) {
526 p->thread.io_bitmap_max = 0;
527 return -ENOMEM;
529 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
530 IO_BITMAP_BYTES);
531 set_tsk_thread_flag(p, TIF_IO_BITMAP);
535 * Set a new TLS for the child thread?
537 if (clone_flags & CLONE_SETTLS) {
538 #ifdef CONFIG_IA32_EMULATION
539 if (test_thread_flag(TIF_IA32))
540 err = do_set_thread_area(p, -1,
541 (struct user_desc __user *)childregs->si, 0);
542 else
543 #endif
544 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
545 if (err)
546 goto out;
548 err = 0;
549 out:
550 if (err && p->thread.io_bitmap_ptr) {
551 kfree(p->thread.io_bitmap_ptr);
552 p->thread.io_bitmap_max = 0;
554 return err;
558 * This special macro can be used to load a debugging register
560 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
562 static inline void __switch_to_xtra(struct task_struct *prev_p,
563 struct task_struct *next_p,
564 struct tss_struct *tss)
566 struct thread_struct *prev, *next;
567 unsigned long debugctl;
569 prev = &prev_p->thread,
570 next = &next_p->thread;
572 debugctl = prev->debugctlmsr;
573 if (next->ds_area_msr != prev->ds_area_msr) {
574 /* we clear debugctl to make sure DS
575 * is not in use when we change it */
576 debugctl = 0;
577 wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
578 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
581 if (next->debugctlmsr != debugctl)
582 wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
584 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
585 loaddebug(next, 0);
586 loaddebug(next, 1);
587 loaddebug(next, 2);
588 loaddebug(next, 3);
589 /* no 4 and 5 */
590 loaddebug(next, 6);
591 loaddebug(next, 7);
594 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
596 * Copy the relevant range of the IO bitmap.
597 * Normally this is 128 bytes or less:
599 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
600 max(prev->io_bitmap_max, next->io_bitmap_max));
601 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
603 * Clear any possible leftover bits:
605 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
608 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
609 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
611 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
612 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
616 * switch_to(x,y) should switch tasks from x to y.
618 * This could still be optimized:
619 * - fold all the options into a flag word and test it with a single test.
620 * - could test fs/gs bitsliced
622 * Kprobes not supported here. Set the probe on schedule instead.
624 struct task_struct *
625 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
627 struct thread_struct *prev = &prev_p->thread,
628 *next = &next_p->thread;
629 int cpu = smp_processor_id();
630 struct tss_struct *tss = &per_cpu(init_tss, cpu);
632 /* we're going to use this soon, after a few expensive things */
633 if (next_p->fpu_counter>5)
634 prefetch(&next->i387.fxsave);
637 * Reload esp0, LDT and the page table pointer:
639 load_sp0(tss, next);
642 * Switch DS and ES.
643 * This won't pick up thread selector changes, but I guess that is ok.
645 asm volatile("mov %%es,%0" : "=m" (prev->es));
646 if (unlikely(next->es | prev->es))
647 loadsegment(es, next->es);
649 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
650 if (unlikely(next->ds | prev->ds))
651 loadsegment(ds, next->ds);
653 load_TLS(next, cpu);
656 * Switch FS and GS.
659 unsigned fsindex;
660 asm volatile("movl %%fs,%0" : "=r" (fsindex));
661 /* segment register != 0 always requires a reload.
662 also reload when it has changed.
663 when prev process used 64bit base always reload
664 to avoid an information leak. */
665 if (unlikely(fsindex | next->fsindex | prev->fs)) {
666 loadsegment(fs, next->fsindex);
667 /* check if the user used a selector != 0
668 * if yes clear 64bit base, since overloaded base
669 * is always mapped to the Null selector
671 if (fsindex)
672 prev->fs = 0;
674 /* when next process has a 64bit base use it */
675 if (next->fs)
676 wrmsrl(MSR_FS_BASE, next->fs);
677 prev->fsindex = fsindex;
680 unsigned gsindex;
681 asm volatile("movl %%gs,%0" : "=r" (gsindex));
682 if (unlikely(gsindex | next->gsindex | prev->gs)) {
683 load_gs_index(next->gsindex);
684 if (gsindex)
685 prev->gs = 0;
687 if (next->gs)
688 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
689 prev->gsindex = gsindex;
692 /* Must be after DS reload */
693 unlazy_fpu(prev_p);
696 * Switch the PDA and FPU contexts.
698 prev->usersp = read_pda(oldrsp);
699 write_pda(oldrsp, next->usersp);
700 write_pda(pcurrent, next_p);
702 write_pda(kernelstack,
703 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
704 #ifdef CONFIG_CC_STACKPROTECTOR
705 write_pda(stack_canary, next_p->stack_canary);
707 * Build time only check to make sure the stack_canary is at
708 * offset 40 in the pda; this is a gcc ABI requirement
710 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
711 #endif
714 * Now maybe reload the debug registers and handle I/O bitmaps
716 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
717 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
718 __switch_to_xtra(prev_p, next_p, tss);
720 /* If the task has used fpu the last 5 timeslices, just do a full
721 * restore of the math state immediately to avoid the trap; the
722 * chances of needing FPU soon are obviously high now
724 if (next_p->fpu_counter>5)
725 math_state_restore();
726 return prev_p;
730 * sys_execve() executes a new program.
732 asmlinkage
733 long sys_execve(char __user *name, char __user * __user *argv,
734 char __user * __user *envp, struct pt_regs regs)
736 long error;
737 char * filename;
739 filename = getname(name);
740 error = PTR_ERR(filename);
741 if (IS_ERR(filename))
742 return error;
743 error = do_execve(filename, argv, envp, &regs);
744 putname(filename);
745 return error;
748 void set_personality_64bit(void)
750 /* inherit personality from parent */
752 /* Make sure to be in 64bit mode */
753 clear_thread_flag(TIF_IA32);
755 /* TBD: overwrites user setup. Should have two bits.
756 But 64bit processes have always behaved this way,
757 so it's not too bad. The main problem is just that
758 32bit childs are affected again. */
759 current->personality &= ~READ_IMPLIES_EXEC;
762 asmlinkage long sys_fork(struct pt_regs *regs)
764 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
767 asmlinkage long
768 sys_clone(unsigned long clone_flags, unsigned long newsp,
769 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
771 if (!newsp)
772 newsp = regs->sp;
773 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
777 * This is trivial, and on the face of it looks like it
778 * could equally well be done in user mode.
780 * Not so, for quite unobvious reasons - register pressure.
781 * In user mode vfork() cannot have a stack frame, and if
782 * done by calling the "clone()" system call directly, you
783 * do not have enough call-clobbered registers to hold all
784 * the information you need.
786 asmlinkage long sys_vfork(struct pt_regs *regs)
788 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
789 NULL, NULL);
792 unsigned long get_wchan(struct task_struct *p)
794 unsigned long stack;
795 u64 fp,ip;
796 int count = 0;
798 if (!p || p == current || p->state==TASK_RUNNING)
799 return 0;
800 stack = (unsigned long)task_stack_page(p);
801 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
802 return 0;
803 fp = *(u64 *)(p->thread.sp);
804 do {
805 if (fp < (unsigned long)stack ||
806 fp > (unsigned long)stack+THREAD_SIZE)
807 return 0;
808 ip = *(u64 *)(fp+8);
809 if (!in_sched_functions(ip))
810 return ip;
811 fp = *(u64 *)fp;
812 } while (count++ < 16);
813 return 0;
816 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
818 int ret = 0;
819 int doit = task == current;
820 int cpu;
822 switch (code) {
823 case ARCH_SET_GS:
824 if (addr >= TASK_SIZE_OF(task))
825 return -EPERM;
826 cpu = get_cpu();
827 /* handle small bases via the GDT because that's faster to
828 switch. */
829 if (addr <= 0xffffffff) {
830 set_32bit_tls(task, GS_TLS, addr);
831 if (doit) {
832 load_TLS(&task->thread, cpu);
833 load_gs_index(GS_TLS_SEL);
835 task->thread.gsindex = GS_TLS_SEL;
836 task->thread.gs = 0;
837 } else {
838 task->thread.gsindex = 0;
839 task->thread.gs = addr;
840 if (doit) {
841 load_gs_index(0);
842 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
845 put_cpu();
846 break;
847 case ARCH_SET_FS:
848 /* Not strictly needed for fs, but do it for symmetry
849 with gs */
850 if (addr >= TASK_SIZE_OF(task))
851 return -EPERM;
852 cpu = get_cpu();
853 /* handle small bases via the GDT because that's faster to
854 switch. */
855 if (addr <= 0xffffffff) {
856 set_32bit_tls(task, FS_TLS, addr);
857 if (doit) {
858 load_TLS(&task->thread, cpu);
859 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
861 task->thread.fsindex = FS_TLS_SEL;
862 task->thread.fs = 0;
863 } else {
864 task->thread.fsindex = 0;
865 task->thread.fs = addr;
866 if (doit) {
867 /* set the selector to 0 to not confuse
868 __switch_to */
869 asm volatile("movl %0,%%fs" :: "r" (0));
870 ret = checking_wrmsrl(MSR_FS_BASE, addr);
873 put_cpu();
874 break;
875 case ARCH_GET_FS: {
876 unsigned long base;
877 if (task->thread.fsindex == FS_TLS_SEL)
878 base = read_32bit_tls(task, FS_TLS);
879 else if (doit)
880 rdmsrl(MSR_FS_BASE, base);
881 else
882 base = task->thread.fs;
883 ret = put_user(base, (unsigned long __user *)addr);
884 break;
886 case ARCH_GET_GS: {
887 unsigned long base;
888 unsigned gsindex;
889 if (task->thread.gsindex == GS_TLS_SEL)
890 base = read_32bit_tls(task, GS_TLS);
891 else if (doit) {
892 asm("movl %%gs,%0" : "=r" (gsindex));
893 if (gsindex)
894 rdmsrl(MSR_KERNEL_GS_BASE, base);
895 else
896 base = task->thread.gs;
898 else
899 base = task->thread.gs;
900 ret = put_user(base, (unsigned long __user *)addr);
901 break;
904 default:
905 ret = -EINVAL;
906 break;
909 return ret;
912 long sys_arch_prctl(int code, unsigned long addr)
914 return do_arch_prctl(current, code, addr);
917 unsigned long arch_align_stack(unsigned long sp)
919 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
920 sp -= get_random_int() % 8192;
921 return sp & ~0xf;
924 unsigned long arch_randomize_brk(struct mm_struct *mm)
926 unsigned long range_end = mm->brk + 0x02000000;
927 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;